From 9bdd6cc443a41e90a3c9dadb68721abb82af7b84 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sat, 12 Oct 2024 23:51:34 +1100
Subject: [PATCH 01/17] init dragen R6 class

---
 R/tso.R        |   6 +-
 R/tso_dragen.R | 185 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 188 insertions(+), 3 deletions(-)
 create mode 100644 R/tso_dragen.R

diff --git a/R/tso.R b/R/tso.R
index 048e993..a302003 100644
--- a/R/tso.R
+++ b/R/tso.R
@@ -119,13 +119,13 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class(
     #' @description Read `MergedSmallVariants.vcf.gz` file.
     #' @param x Path to file.
     read_msv = function(x) {
-      dat <- bcftools_parse_vcf(x, only_pass = FALSE, alias = FALSE)
+      dat <- bcftools_parse_vcf(x, only_pass = FALSE, alias = TRUE)
       tibble::tibble(name = "mergedsmallv", data = list(dat))
     },
     #' @description Read `MergedSmallVariants.genome.vcf.gz` file.
     #' @param x Path to file.
     read_msvg = function(x) {
-      dat <- bcftools_parse_vcf(x, only_pass = FALSE, alias = FALSE)
+      dat <- bcftools_parse_vcf(x, only_pass = FALSE, alias = TRUE)
       tibble::tibble(name = "mergedsmallvg", data = list(dat))
     },
     #' @description Read `CombinedVariantOutput.tsv` file.
@@ -137,7 +137,7 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class(
     #' @description Read `CopyNumberVariants.vcf.gz` file.
     #' @param x Path to file.
     read_cnv = function(x) {
-      dat <- bcftools_parse_vcf(x, only_pass = FALSE, alias = FALSE)
+      dat <- bcftools_parse_vcf(x, only_pass = FALSE, alias = TRUE)
       tibble::tibble(name = "cnv", data = list(dat))
     },
     #' @description Read `fragment_length_hist.json.gz` file.
diff --git a/R/tso_dragen.R b/R/tso_dragen.R
new file mode 100644
index 0000000..f7ac52b
--- /dev/null
+++ b/R/tso_dragen.R
@@ -0,0 +1,185 @@
+#' Wf_dragen R6 Class
+#'
+#' @description
+#' Reads and writes tidy versions of files from the `dragen` workflow.
+#'
+#' @examples
+#' \dontrun{
+#'
+#' #---- Local ----#
+#' p <- file.path(
+#'   "~/s3/pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production",
+#'   "analysis/cttsov2/20240915ff0295ed"
+#' )
+#' prefix <- "L2401290"
+#' t1 <- Wf_tso_ctdna_tumor_only_v2$new(path = p, prefix = prefix)
+#' t1$list_files(max_files = 100)
+#' t1$list_files_filter_relevant(max_files = 300)
+#' d <- t1$download_files(max_files = 100, dryrun = F)
+#' d_tidy <- t1$tidy_files(d)
+#' d_write <- t1$write(
+#'   d_tidy,
+#'   outdir = file.path(p, "dracarys_tidy"),
+#'   prefix = prefix,
+#'   format = "tsv"
+#' )
+#' }
+#' @export
+Wf_dragen <- R6::R6Class(
+  "Wf_dragen",
+  inherit = Wf,
+  public = list(
+    #' @field prefix The LibraryID prefix of the sample (needed for path lookup).
+    prefix = NULL,
+    #' @description Create a new Wf_dragen object.
+    #' @param path Path to directory with raw workflow results (from S3 or
+    #' local filesystem).
+    #' @param prefix The LibraryID prefix of the sample (needed for path lookup).
+    initialize = function(path = NULL, prefix = NULL) {
+      wname <- "dragen"
+      pref <- prefix
+      reg1 <- tibble::tribble(
+        ~regex, ~fun,
+        glue("{dc}/{pref}\\-replay\\.json$"), "replay",
+        glue("{dc}/{pref}\\.cnv_metrics.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.exon_contig_mean_cov\\.csv$"), "contigMeanCov",
+        glue("{dc}/{pref}\\.exon_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.exon_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.exon_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.exon_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.fastqc_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.fragment_length_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.gc_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.gvcf_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.mapping_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.microsat_diffs\\.txt$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.microsat_output\\.json$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.sv_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.target_bed_contig_mean_cov\\.csv$"), "contigMeanCov",
+        glue("{dc}/{pref}\\.target_bed_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.target_bed_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.target_bed_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.target_bed_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.time_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.tmb_contig_mean_cov\\.csv$"), "contigMeanCov",
+        glue("{dc}/{pref}\\.tmb_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.tmb_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.tmb_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.tmb_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.trimmer_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.umi_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.vc_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.wgs_contig_mean_cov\\.csv$"), "contigMeanCov",
+        glue("{dc}/{pref}\\.wgs_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.wgs_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.wgs_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{dc}/{pref}\\.wgs_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY"
+      )
+      regexes <- reg1 |>
+        dplyr::mutate(
+          fun = paste0("read_", .data$fun),
+          fun = ifelse(.data$fun == "read_DOWNLOAD_ONLY", "DOWNLOAD_ONLY", .data$fun)
+        )
+
+      super$initialize(path = path, wname = wname, regexes = regexes)
+      self$prefix <- prefix
+    },
+    #' @description Print details about the Workflow.
+    #' @param ... (ignored).
+    print = function(...) {
+      res <- tibble::tribble(
+        ~var, ~value,
+        "path", self$path,
+        "wname", self$wname,
+        "filesystem", self$filesystem,
+        "prefix", self$prefix
+      )
+      print(res)
+      invisible(self)
+    },
+    #' @description Read `replay.json` file.
+    #' @param x Path to file.
+    read_replay = function(x) {
+      res <- x |>
+        jsonlite::read_json(simplifyVector = TRUE) |>
+        purrr::map_if(is.data.frame, tibble::as_tibble)
+      req_elements <- c("command_line", "hash_table_build", "dragen_config", "system")
+      assertthat::assert_that(all(names(res) %in% req_elements))
+      res[["system"]] <- res[["system"]] |>
+        tibble::as_tibble_row()
+      res[["hash_table_build"]] <- res[["hash_table_build"]] |>
+        tibble::as_tibble_row()
+      # we don't care if the columns are characters, no analysis likely to be done on dragen options
+      # (though never say never!)
+      res[["dragen_config"]] <- res[["dragen_config"]] |>
+        tidyr::pivot_wider(names_from = "name", values_from = "value")
+      return(dplyr::bind_cols(res))
+    },
+    #' @description Read `contig_mean_cov.csv` file.
+    #' @param x Path to file.
+    read_contigMeanCov = function(x) {
+      readr::read_csv(x, col_names = c("chrom", "n_bases", "coverage"), col_types = "cdd") |>
+        dplyr::filter(
+          if (!keep_alt) {
+            !grepl("chrM|MT|_|Autosomal|HLA-|EBV", .data$chrom)
+          } else {
+            TRUE
+          }
+        )
+    },
+    #' @description Read `dragen.tsv.gz` cancer report hrd file.
+    #' @param x Path to file.
+    read_coverageMetrics = function(x) {
+      abbrev_nm <- c(
+        "Aligned bases"                                       = "bases_aligned_dragen",
+        "Aligned bases in genome"                             = "bases_aligned_in_genome_dragen",
+        "Average alignment coverage over genome"              = "cov_alignment_avg_over_genome_dragen",
+        "Uniformity of coverage (PCT > 0.2*mean) over genome" = "cov_uniformity_over_genome_pct_gt02mean_dragen",
+        "Uniformity of coverage (PCT > 0.4*mean) over genome" = "cov_uniformity_over_genome_pct_gt04mean_dragen",
+        "Average chr X coverage over genome"                  = "cov_avg_x_over_genome_dragen",
+        "Average chr Y coverage over genome"                  = "cov_avg_y_over_genome_dragen",
+        "Average mitochondrial coverage over genome"          = "cov_avg_mt_over_genome_dragen",
+        "Average autosomal coverage over genome"              = "cov_avg_auto_over_genome_dragen",
+        "Median autosomal coverage over genome"               = "cov_median_auto_over_genome_dragen",
+        "Mean/Median autosomal coverage ratio over genome"    = "cov_mean_median_auto_ratio_over_genome_dragen",
+        "Aligned reads"                                       = "reads_aligned_dragen",
+        "Aligned reads in genome"                             = "reads_aligned_in_genome_dragen"
+      )
+      raw <- readr::read_lines(x)
+      assertthat::assert_that(grepl("COVERAGE SUMMARY", raw[1]))
+
+      res <- raw |>
+        tibble::as_tibble_col(column_name = "value") |>
+        tidyr::separate_wider_delim(
+          "value",
+          delim = ",", too_few = "align_start",
+          names = c("category", "dummy1", "var", "value", "pct")
+        )
+      # split to rename the
+      # "PCT of genome with coverage [100x: inf)" values
+      res1 <- res |>
+        # pct just shows 100% for a couple rows
+        dplyr::filter(!grepl("PCT of genome with coverage", .data$var)) |>
+        dplyr::select("var", "value")
+      res2 <- res |>
+        dplyr::filter(grepl("PCT of genome with coverage", .data$var)) |>
+        dplyr::mutate(
+          var = sub("PCT of genome with coverage ", "", .data$var),
+          var = gsub("\\[|\\]|\\(|\\)| ", "", .data$var),
+          var = gsub("x", "", .data$var),
+          var = gsub("inf", "Inf", .data$var)
+        ) |>
+        tidyr::separate_wider_delim("var", names = c("start", "end"), delim = ":") |>
+        dplyr::mutate(var = as.character(glue("cov_genome_pct_{start}_{end}_dragen"))) |>
+        dplyr::select("var", "value")
+      res <- dplyr::bind_rows(res1, res2) |>
+        dplyr::mutate(
+          value = dplyr::na_if(.data$value, "NA"),
+          value = as.numeric(.data$value),
+          var = dplyr::recode(.data$var, !!!abbrev_nm)
+        ) |>
+        tidyr::pivot_wider(names_from = "var", values_from = "value")
+      return(res)
+    },
+  ) # end public
+) # end Wf_dragen

From 283db95f7496280a80cc9b81f21dec6db656858e Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sun, 13 Oct 2024 23:38:54 +1100
Subject: [PATCH 02/17] alignqc: join metrics tabs with meta

---
 .../alignment_qc/dl_and_tidy.R                |  4 +-
 .../umccr_workflows/alignment_qc/summary.Rmd  | 60 ++++++++++++-------
 2 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/inst/rmd/umccr_workflows/alignment_qc/dl_and_tidy.R b/inst/rmd/umccr_workflows/alignment_qc/dl_and_tidy.R
index f5ed36b..9fbfa12 100755
--- a/inst/rmd/umccr_workflows/alignment_qc/dl_and_tidy.R
+++ b/inst/rmd/umccr_workflows/alignment_qc/dl_and_tidy.R
@@ -14,8 +14,6 @@ c("AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_REGION") |>
   stopifnot()
 icav1_token <- Sys.getenv("ICA_ACCESS_TOKEN") |>
   dracarys::ica_token_validate()
-# this helps keep annoying reticulate prompt away
-Sys.setenv(RETICULATE_PYTHON = Sys.getenv("CONDA_PYTHON_EXE"))
 
 query_workflow_alignqc <- function(start_date) {
   wfs <- c("wgs_alignment_qc", "wts_alignment_qc") |>
@@ -37,7 +35,7 @@ query_limsrow_libids <- function(libids) {
 }
 
 # first read in the workflows table, extract metadata, then join with lims
-start_date <- "2024-09-27"
+start_date <- "2024-10-11"
 p_raw <- query_workflow_alignqc(start_date)
 
 wgs <- p_raw |>
diff --git a/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd b/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd
index b40fbe7..95807e2 100644
--- a/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd
+++ b/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd
@@ -9,7 +9,7 @@ output:
     highlight: kate
 params:
   title: "UMCCR Alignment QC Summary Report"
-  meta: !r here::here("inst/rmd/umccr_workflows/alignment_qc/nogit/meta/2024-09-27_wgts.rds")
+  meta: !r here::here("inst/rmd/umccr_workflows/alignment_qc/nogit/meta/2024-10-11_wgts.rds")
 description: "UMCCR Alignment QC Summary Report"
 title: "`r params$title`"
 ---
@@ -239,7 +239,14 @@ conf <- list(
   pink_range = c(8, 20)
 )
 d_map |>
-  dt_view() |>
+  left_join(
+    meta |>
+      mutate(umccrid = glue("{.data$SubjectID}_{.data$LibraryID}_LN{.data$lane}")) |>
+      select(umccrid, assay, workflow, project_name, project_owner),
+    by = "umccrid"
+  ) |>
+  select(umccrid, phenotype, type, source, assay, workflow, project_name, project_owner, everything()) |>
+  dt_view(scroll_y = 1500) |>
   DT::formatCurrency(columns = names(d_map)[num_pct_cols], currency = "", digits = 1) |>
   DT::formatCurrency(columns = names(d_map)[no_numpct_cols], currency = "", digits = 0) |>
   DT::formatStyle(
@@ -304,7 +311,14 @@ d_cvg <- dr_unnest("WgsCoverageMetricsFile") |>
   )
 num_cols <- names(d_cvg)[purrr::map_lgl(d_cvg, is.numeric)]
 d_cvg |>
-  dt_view() |>
+  left_join(
+    meta |>
+      mutate(umccrid = glue("{.data$SubjectID}_{.data$LibraryID}_LN{.data$lane}")) |>
+      select(umccrid, assay, workflow, project_name, project_owner),
+    by = "umccrid"
+  ) |>
+  select(umccrid, phenotype, type, source, assay, workflow, project_name, project_owner, everything()) |>
+  dt_view(scroll_y = 1500) |>
   DT::formatCurrency(columns = num_cols, currency = "", digits = 1) |>
   DT::formatStyle(
     "ploidy",
@@ -392,7 +406,7 @@ f1_plot <- ggplot() +
     fill = rep(fqc_colours1$col, length(unique(f1$type))),
     alpha = 0.7
   ) +
-  geom_line(data = f1, aes(x = q, y = prop, colour = umccrid, linetype = mate), linewidth = 1) +
+  geom_line(data = f1, aes(x = q, y = prop, colour = umccrid, linetype = mate), linewidth = 1, show.legend = FALSE) +
   scale_y_continuous(labels = scales::label_comma()) +
   theme(panel.grid.major = element_blank()) +
   facet_wrap(~type, ncol = 1) +
@@ -401,8 +415,8 @@ f1_plot <- ggplot() +
     subtitle = glue("Percentage of reads with average quality scores. Shows if\na subset of reads has poor quality.")
   )
 
-# plotly::ggplotly(f1_plot)
-f1_plot
+plotly::ggplotly(f1_plot)
+# f1_plot
 ```
 
 ### GC Content ('Per-Sequence GC Content')
@@ -427,8 +441,8 @@ gc_data_plot <- gc_data |>
     title = "Read GC Content",
     subtitle = glue("Total number of reads with each GC content\npercentile between 0% and 100%")
   )
-# plotly::ggplotly(gc_data_plot)
-gc_data_plot
+plotly::ggplotly(gc_data_plot)
+# gc_data_plot
 ```
 
 ### GC Content Quality ('GC Content Mean Quality Scores')
@@ -454,15 +468,15 @@ f1_plot <- ggplot() +
     title = "GC Content Quality",
     subtitle = glue("Average Phred-scale read mean quality for reads with\neach GC content percentile between 0% and 100%.")
   )
-# plotly::ggplotly(f1_plot)
-f1_plot
+plotly::ggplotly(f1_plot)
+# f1_plot
 ```
 
 ### Positional Base Content ('Per-Position Sequence Content')
 
 - TODO: create heatmap instead
 
-```{r fqc_pbc, eval=T, fig.height=42}
+```{r fqc_pbc, eval=F, fig.height=42}
 f1 <- dr_unnest("FastqcMetricsFile_positional_base_content")
 f1 |>
   filter(base != "N") |>
@@ -484,7 +498,7 @@ f1 |>
 ### Positional Base Mean Quality ('Per-Position Mean Quality Scores')
 
 
-```{r fqc_bmq, eval=T, fig.height=80}
+```{r fqc_bmq, eval=F, fig.height=80}
 f1 <- dr_unnest("FastqcMetricsFile_positional_base_mean_quality")
 ggplot() +
   geom_rect(
@@ -508,7 +522,7 @@ ggplot() +
 
 ### Positional Quality ('Per-Position Quality Score Ranges')
 
-```{r fqc_pq, eval=T, fig.width=13}
+```{r fqc_pq, eval=F, fig.width=13}
 # TODO: use boxplot instead of point
 f1 <- dr_unnest("FastqcMetricsFile_positional_quality")
 quants <- c(25, 50, 75)
@@ -546,8 +560,8 @@ read_len_plot <- read_len |>
     title = "Read Lengths",
     subtitle = glue("Read percentage with each observed length.")
   )
-# plotly::ggplotly(read_len_plot)
-read_len_plot
+plotly::ggplotly(read_len_plot)
+# read_len_plot
 ```
 
 ### Sequence Positions ('Adapter Content')
@@ -569,7 +583,7 @@ f1 |>
 
 ## Coverage {.tabset .tabset-pills}
 
-```{r contig_cvg, eval=T, results='asis', fig.height=5}
+```{r contig_cvg, eval=F, results='asis', fig.height=5}
 d1 <- dr_unnest("WgsContigMeanCovFile") |>
   arrange(desc("umccrid"))
 for (type1 in sort(unique(d1$type), decreasing = FALSE)) {
@@ -608,13 +622,13 @@ flp <- fl1 |>
   xlab("Fragment Length (bp)") +
   ylab(glue("Read Count (min: {min_count})")) +
   theme(
-    legend.position = c(0.9, 0.9),
+    legend.position.inside = c(0.9, 0.9),
     legend.justification = c(1, 1),
     panel.grid.minor = ggplot2::element_blank(),
     plot.title = ggplot2::element_text(colour = "#2c3e50", size = 14, face = "bold")
   )
-# plotly::ggplotly(flp)
-flp
+plotly::ggplotly(flp)
+# flp
 ```
 
 ---
@@ -638,8 +652,8 @@ d_pl_plot <- d_pl_plot_data |>
   geom_line(aes(colour = umccrid, group = umccrid), na.rm = TRUE) +
   geom_point(aes(colour = umccrid), na.rm = TRUE) +
   labs(title = "Chromosome Median / Autosomal Median")
-# plotly::ggplotly(d_pl_plot)
-d_pl_plot
+plotly::ggplotly(d_pl_plot)
+# d_pl_plot
 ```
 
 
@@ -671,12 +685,12 @@ d_hist2 <- d_hist |>
     x = "Coverage", y = "PCT Cumsum",
     title = "Percentage of sites in genome with given coverage"
   )
-plotly::subplot(d_hist1, d_hist2, shareY = TRUE, titleY = TRUE, titleX = TRUE, nrows = 2)
+# plotly::subplot(d_hist1, d_hist2, shareY = TRUE, titleY = TRUE, titleX = TRUE, nrows = 2)
 ```
 
 ## FineHist
 
-```{r finehist, eval=T, fig.height=10, fig.width=12}
+```{r finehist, eval=F, fig.height=10, fig.width=12}
 d_fhist <- dr_unnest("WgsFineHistFile")
 d_fhist |>
   dracarys::WgsFineHistFile$public_methods$plot(c(0, 150)) +

From 733476843ce0937e4568dc45782c26fa978406cf Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Mon, 14 Oct 2024 23:58:33 +1100
Subject: [PATCH 03/17] dragen: generalise coverage metrics

---
 NAMESPACE        |   1 +
 R/tso_dragen.R   | 152 ++++++++++++++++++++++++-----------------
 man/Wf_dragen.Rd | 171 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 264 insertions(+), 60 deletions(-)
 create mode 100644 man/Wf_dragen.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 4cf547e..ec8e62c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -18,6 +18,7 @@ export(TimeMetricsFile)
 export(TrimmerMetricsFile)
 export(VCMetricsFile)
 export(Wf)
+export(Wf_dragen)
 export(Wf_sash)
 export(Wf_sash_download_tidy_write)
 export(Wf_tso_ctdna_tumor_only)
diff --git a/R/tso_dragen.R b/R/tso_dragen.R
index f7ac52b..e7dd346 100644
--- a/R/tso_dragen.R
+++ b/R/tso_dragen.R
@@ -7,12 +7,13 @@
 #' \dontrun{
 #'
 #' #---- Local ----#
+#' prefix <- "L2401290"
 #' p <- file.path(
 #'   "~/s3/pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production",
-#'   "analysis/cttsov2/20240915ff0295ed"
+#'   "analysis/cttsov2/20240915ff0295ed/Logs_Intermediates/DragenCaller",
+#'   prefix
 #' )
-#' prefix <- "L2401290"
-#' t1 <- Wf_tso_ctdna_tumor_only_v2$new(path = p, prefix = prefix)
+#' t1 <- Wf_dragen$new(path = p, prefix = prefix)
 #' t1$list_files(max_files = 100)
 #' t1$list_files_filter_relevant(max_files = 300)
 #' d <- t1$download_files(max_files = 100, dryrun = F)
@@ -40,40 +41,40 @@ Wf_dragen <- R6::R6Class(
       pref <- prefix
       reg1 <- tibble::tribble(
         ~regex, ~fun,
-        glue("{dc}/{pref}\\-replay\\.json$"), "replay",
-        glue("{dc}/{pref}\\.cnv_metrics.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.exon_contig_mean_cov\\.csv$"), "contigMeanCov",
-        glue("{dc}/{pref}\\.exon_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.exon_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.exon_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.exon_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.fastqc_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.fragment_length_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.gc_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.gvcf_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.mapping_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.microsat_diffs\\.txt$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.microsat_output\\.json$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.sv_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.target_bed_contig_mean_cov\\.csv$"), "contigMeanCov",
-        glue("{dc}/{pref}\\.target_bed_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.target_bed_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.target_bed_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.target_bed_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.time_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.tmb_contig_mean_cov\\.csv$"), "contigMeanCov",
-        glue("{dc}/{pref}\\.tmb_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.tmb_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.tmb_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.tmb_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.trimmer_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.umi_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.vc_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.wgs_contig_mean_cov\\.csv$"), "contigMeanCov",
-        glue("{dc}/{pref}\\.wgs_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.wgs_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.wgs_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.wgs_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY"
+        glue("{pref}\\-replay\\.json$"), "replay",
+        glue("{pref}\\.cnv_metrics.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.exon_contig_mean_cov\\.csv$"), "contigMeanCov",
+        glue("{pref}\\.exon_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.exon_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.exon_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.exon_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.fastqc_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.fragment_length_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.gc_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.gvcf_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.mapping_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.microsat_diffs\\.txt$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.microsat_output\\.json$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.sv_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.target_bed_contig_mean_cov\\.csv$"), "contigMeanCov",
+        glue("{pref}\\.target_bed_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.target_bed_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.target_bed_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.target_bed_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.time_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.tmb_contig_mean_cov\\.csv$"), "contigMeanCov",
+        glue("{pref}\\.tmb_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.tmb_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.tmb_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.tmb_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.trimmer_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.umi_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.vc_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.wgs_contig_mean_cov\\.csv$"), "contigMeanCov",
+        glue("{pref}\\.wgs_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.wgs_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.wgs_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.wgs_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY"
       )
       regexes <- reg1 |>
         dplyr::mutate(
@@ -117,11 +118,12 @@ Wf_dragen <- R6::R6Class(
     },
     #' @description Read `contig_mean_cov.csv` file.
     #' @param x Path to file.
-    read_contigMeanCov = function(x) {
+    #' @param keep_alt Keep ALT contigs.
+    read_contigMeanCov = function(x, keep_alt = FALSE) {
       readr::read_csv(x, col_names = c("chrom", "n_bases", "coverage"), col_types = "cdd") |>
         dplyr::filter(
           if (!keep_alt) {
-            !grepl("chrM|MT|_|Autosomal|HLA-|EBV", .data$chrom)
+            !grepl("chrM|MT|_|Autosomal|HLA-|EBV|GL|hs37d5", .data$chrom)
           } else {
             TRUE
           }
@@ -130,24 +132,26 @@ Wf_dragen <- R6::R6Class(
     #' @description Read `dragen.tsv.gz` cancer report hrd file.
     #' @param x Path to file.
     read_coverageMetrics = function(x) {
-      abbrev_nm <- c(
-        "Aligned bases"                                       = "bases_aligned_dragen",
-        "Aligned bases in genome"                             = "bases_aligned_in_genome_dragen",
-        "Average alignment coverage over genome"              = "cov_alignment_avg_over_genome_dragen",
-        "Uniformity of coverage (PCT > 0.2*mean) over genome" = "cov_uniformity_over_genome_pct_gt02mean_dragen",
-        "Uniformity of coverage (PCT > 0.4*mean) over genome" = "cov_uniformity_over_genome_pct_gt04mean_dragen",
-        "Average chr X coverage over genome"                  = "cov_avg_x_over_genome_dragen",
-        "Average chr Y coverage over genome"                  = "cov_avg_y_over_genome_dragen",
-        "Average mitochondrial coverage over genome"          = "cov_avg_mt_over_genome_dragen",
-        "Average autosomal coverage over genome"              = "cov_avg_auto_over_genome_dragen",
-        "Median autosomal coverage over genome"               = "cov_median_auto_over_genome_dragen",
-        "Mean/Median autosomal coverage ratio over genome"    = "cov_mean_median_auto_ratio_over_genome_dragen",
-        "Aligned reads"                                       = "reads_aligned_dragen",
-        "Aligned reads in genome"                             = "reads_aligned_in_genome_dragen"
+      # all rows except 'Aligned bases' and 'Aligned reads' refer to the region
+      abbrev_nm <- tibble::tribble(
+        ~raw, ~clean, ~region,
+        "Aligned bases", "bases_aligned_tot_dragen", FALSE,
+        "Aligned reads", "reads_aligned_tot_dragen", FALSE,
+        "Aligned bases in ", "bases_aligned_", TRUE,
+        "Average alignment coverage over ", "cov_alignment_avg_over_", TRUE,
+        "Uniformity of coverage (PCT > 0.2*mean) over ", "cov_uniformity_pct_gt02mean_", TRUE,
+        "Uniformity of coverage (PCT > 0.4*mean) over ", "cov_uniformity_pct_gt04mean_", TRUE,
+        "Average chr X coverage over ", "cov_avg_x_over_", TRUE,
+        "Average chr Y coverage over ", "cov_avg_y_over_", TRUE,
+        "Average mitochondrial coverage over ", "cov_avg_mt_over_", TRUE,
+        "Average autosomal coverage over ", "cov_avg_auto_over_", TRUE,
+        "Median autosomal coverage over ", "cov_median_auto_over_", TRUE,
+        "Mean/Median autosomal coverage ratio over ", "cov_mean_median_auto_ratio_over_", TRUE,
+        "Aligned reads in ", "reads_aligned_in_", TRUE
       )
       raw <- readr::read_lines(x)
       assertthat::assert_that(grepl("COVERAGE SUMMARY", raw[1]))
-
+      # first detect if this is genome, QC coverage region, or target region
       res <- raw |>
         tibble::as_tibble_col(column_name = "value") |>
         tidyr::separate_wider_delim(
@@ -155,22 +159,50 @@ Wf_dragen <- R6::R6Class(
           delim = ",", too_few = "align_start",
           names = c("category", "dummy1", "var", "value", "pct")
         )
+      reg1 <- NULL
+      str1 <- NULL
+      tmp <- res |>
+        dplyr::filter(grepl("PCT of .* with coverage ", .data$var)) |>
+        dplyr::slice_head(n = 1) |>
+        dplyr::pull("var")
+      assertthat::assert_that(length(tmp) == 1)
+      if (grepl("genome", tmp)) {
+        str1 <- "genome"
+        reg1 <- "genome"
+      } else if (grepl("QC coverage region", tmp)) {
+        str1 <- "QC coverage region"
+        reg1 <- "qccovreg"
+      } else if (grepl("target region", tmp)) {
+        str1 <- "target region"
+        reg1 <- "targetreg"
+      } else {
+        cli::cli_abort("Cannot determine the coverage region from: {x}")
+      }
+      abbrev_nm <- abbrev_nm |>
+        dplyr::mutate(
+          raw = ifelse(.data$region, glue("{.data$raw}{str1}"), .data$raw),
+          clean = ifelse(.data$region, glue("{.data$clean}{reg1}_dragen"), .data$clean)
+        ) |>
+        dplyr::select("raw", "clean") |>
+        tibble::deframe()
       # split to rename the
       # "PCT of genome with coverage [100x: inf)" values
+      pat <- glue("PCT of {str1} with coverage ")
       res1 <- res |>
-        # pct just shows 100% for a couple rows
-        dplyr::filter(!grepl("PCT of genome with coverage", .data$var)) |>
+        # pct just shows % for a couple rows which can be
+        # calculated from their above values
+        dplyr::filter(!grepl(pat, .data$var)) |>
         dplyr::select("var", "value")
       res2 <- res |>
-        dplyr::filter(grepl("PCT of genome with coverage", .data$var)) |>
+        dplyr::filter(grepl(pat, .data$var)) |>
         dplyr::mutate(
-          var = sub("PCT of genome with coverage ", "", .data$var),
+          var = sub(pat, "", .data$var),
           var = gsub("\\[|\\]|\\(|\\)| ", "", .data$var),
           var = gsub("x", "", .data$var),
           var = gsub("inf", "Inf", .data$var)
         ) |>
         tidyr::separate_wider_delim("var", names = c("start", "end"), delim = ":") |>
-        dplyr::mutate(var = as.character(glue("cov_genome_pct_{start}_{end}_dragen"))) |>
+        dplyr::mutate(var = as.character(glue("cov_pct_{start}_{end}_{region}_dragen"))) |>
         dplyr::select("var", "value")
       res <- dplyr::bind_rows(res1, res2) |>
         dplyr::mutate(
@@ -180,6 +212,6 @@ Wf_dragen <- R6::R6Class(
         ) |>
         tidyr::pivot_wider(names_from = "var", values_from = "value")
       return(res)
-    },
+    }
   ) # end public
 ) # end Wf_dragen
diff --git a/man/Wf_dragen.Rd b/man/Wf_dragen.Rd
new file mode 100644
index 0000000..1c57315
--- /dev/null
+++ b/man/Wf_dragen.Rd
@@ -0,0 +1,171 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tso_dragen.R
+\name{Wf_dragen}
+\alias{Wf_dragen}
+\title{Wf_dragen R6 Class}
+\description{
+Reads and writes tidy versions of files from the \code{dragen} workflow.
+}
+\examples{
+\dontrun{
+
+#---- Local ----#
+prefix <- "L2401290"
+p <- file.path(
+  "~/s3/pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production",
+  "analysis/cttsov2/20240915ff0295ed/Logs_Intermediates/DragenCaller",
+  prefix
+)
+t1 <- Wf_dragen$new(path = p, prefix = prefix)
+t1$list_files(max_files = 100)
+t1$list_files_filter_relevant(max_files = 300)
+d <- t1$download_files(max_files = 100, dryrun = F)
+d_tidy <- t1$tidy_files(d)
+d_write <- t1$write(
+  d_tidy,
+  outdir = file.path(p, "dracarys_tidy"),
+  prefix = prefix,
+  format = "tsv"
+)
+}
+}
+\section{Super class}{
+\code{\link[dracarys:Wf]{dracarys::Wf}} -> \code{Wf_dragen}
+}
+\section{Public fields}{
+\if{html}{\out{<div class="r6-fields">}}
+\describe{
+\item{\code{prefix}}{The LibraryID prefix of the sample (needed for path lookup).}
+}
+\if{html}{\out{</div>}}
+}
+\section{Methods}{
+\subsection{Public methods}{
+\itemize{
+\item \href{#method-Wf_dragen-new}{\code{Wf_dragen$new()}}
+\item \href{#method-Wf_dragen-print}{\code{Wf_dragen$print()}}
+\item \href{#method-Wf_dragen-read_replay}{\code{Wf_dragen$read_replay()}}
+\item \href{#method-Wf_dragen-read_contigMeanCov}{\code{Wf_dragen$read_contigMeanCov()}}
+\item \href{#method-Wf_dragen-read_coverageMetrics}{\code{Wf_dragen$read_coverageMetrics()}}
+\item \href{#method-Wf_dragen-clone}{\code{Wf_dragen$clone()}}
+}
+}
+\if{html}{\out{
+<details open><summary>Inherited methods</summary>
+<ul>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="download_files"><a href='../../dracarys/html/Wf.html#method-Wf-download_files'><code>dracarys::Wf$download_files()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="list_files"><a href='../../dracarys/html/Wf.html#method-Wf-list_files'><code>dracarys::Wf$list_files()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="list_files_filter_relevant"><a href='../../dracarys/html/Wf.html#method-Wf-list_files_filter_relevant'><code>dracarys::Wf$list_files_filter_relevant()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="tidy_files"><a href='../../dracarys/html/Wf.html#method-Wf-tidy_files'><code>dracarys::Wf$tidy_files()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="write"><a href='../../dracarys/html/Wf.html#method-Wf-write'><code>dracarys::Wf$write()</code></a></span></li>
+</ul>
+</details>
+}}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-new"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-new}{}}}
+\subsection{Method \code{new()}}{
+Create a new Wf_dragen object.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$new(path = NULL, prefix = NULL)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{path}}{Path to directory with raw workflow results (from S3 or
+local filesystem).}
+
+\item{\code{prefix}}{The LibraryID prefix of the sample (needed for path lookup).}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-print"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-print}{}}}
+\subsection{Method \code{print()}}{
+Print details about the Workflow.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$print(...)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{...}}{(ignored).}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_replay"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_replay}{}}}
+\subsection{Method \code{read_replay()}}{
+Read \code{replay.json} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_replay(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_contigMeanCov"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_contigMeanCov}{}}}
+\subsection{Method \code{read_contigMeanCov()}}{
+Read \code{contig_mean_cov.csv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_contigMeanCov(x, keep_alt = FALSE)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+
+\item{\code{keep_alt}}{Keep ALT contigs.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_coverageMetrics"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_coverageMetrics}{}}}
+\subsection{Method \code{read_coverageMetrics()}}{
+Read \code{dragen.tsv.gz} cancer report hrd file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_coverageMetrics(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-clone"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-clone}{}}}
+\subsection{Method \code{clone()}}{
+The objects of this class are cloneable with this method.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$clone(deep = FALSE)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{deep}}{Whether to make a deep clone.}
+}
+\if{html}{\out{</div>}}
+}
+}
+}

From c0ef098f6b8fdca54406c0096bd8168bf3e7d56c Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Tue, 15 Oct 2024 10:20:27 +1100
Subject: [PATCH 04/17] dragen: more parsers

---
 NAMESPACE                                     |   1 +
 R/Wf.R                                        |   3 +-
 R/dragen.R                                    |  90 ++++++++++++++
 R/tso_dragen.R                                | 116 ++++++------------
 man/Wf_dragen.Rd                              |  42 ++++++-
 man/dragen_coverage_metrics_read.Rd           |  17 +++
 .../test-roxytest-testexamples-dragen.R       |   2 +-
 7 files changed, 185 insertions(+), 86 deletions(-)
 create mode 100644 man/dragen_coverage_metrics_read.Rd

diff --git a/NAMESPACE b/NAMESPACE
index ec8e62c..859e3dc 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -36,6 +36,7 @@ export(dr_func_eval)
 export(dr_gds_download)
 export(dr_output_format_valid)
 export(dr_s3_download)
+export(dragen_coverage_metrics_read)
 export(dtw_Wf_tso_ctdna_tumor_only)
 export(dtw_Wf_tso_ctdna_tumor_only_v2)
 export(empty_tbl)
diff --git a/R/Wf.R b/R/Wf.R
index 7c2edab..4f23fc6 100644
--- a/R/Wf.R
+++ b/R/Wf.R
@@ -92,7 +92,8 @@ Wf <- R6::R6Class(
         "oncoanalyser_wgts_existing_both",
         "sash"
       )
-      assertthat::assert_that(wname %in% wnames)
+      subwnames <- c("dragen")
+      assertthat::assert_that(wname %in% c(wnames, subwnames))
       self$path <- sub("/$", "", path) # remove potential trailing slash
       self$wname <- wname
       self$filesystem <- dplyr::case_when(
diff --git a/R/dragen.R b/R/dragen.R
index 7e7459c..95790b0 100644
--- a/R/dragen.R
+++ b/R/dragen.R
@@ -1,3 +1,93 @@
+#' Read DRAGEN Coverage Metrics
+#'
+#' Reads the `coverage_metrics.csv` file generated by DRAGEN.
+#' @param x Path to file.
+#'
+#' @return Tibble with data in list column.
+#' @export
+dragen_coverage_metrics_read <- function(x) {
+  # all rows except 'Aligned bases' and 'Aligned reads' refer to the region
+  abbrev_nm <- tibble::tribble(
+    ~raw, ~clean, ~region,
+    "Aligned bases", "bases_aligned_tot_dragen", FALSE,
+    "Aligned reads", "reads_aligned_tot_dragen", FALSE,
+    "Aligned bases in ", "bases_aligned_", TRUE,
+    "Average alignment coverage over ", "cov_alignment_avg_over_", TRUE,
+    "Uniformity of coverage (PCT > 0.2*mean) over ", "cov_uniformity_pct_gt02mean_", TRUE,
+    "Uniformity of coverage (PCT > 0.4*mean) over ", "cov_uniformity_pct_gt04mean_", TRUE,
+    "Average chr X coverage over ", "cov_avg_x_over_", TRUE,
+    "Average chr Y coverage over ", "cov_avg_y_over_", TRUE,
+    "Average mitochondrial coverage over ", "cov_avg_mt_over_", TRUE,
+    "Average autosomal coverage over ", "cov_avg_auto_over_", TRUE,
+    "Median autosomal coverage over ", "cov_median_auto_over_", TRUE,
+    "Mean/Median autosomal coverage ratio over ", "cov_mean_median_auto_ratio_over_", TRUE,
+    "Aligned reads in ", "reads_aligned_in_", TRUE
+  )
+  raw <- readr::read_lines(x)
+  assertthat::assert_that(grepl("COVERAGE SUMMARY", raw[1]))
+  # first detect if this is genome, QC coverage region, or target region
+  res <- raw |>
+    tibble::as_tibble_col(column_name = "value") |>
+    tidyr::separate_wider_delim(
+      "value",
+      delim = ",", too_few = "align_start",
+      names = c("category", "dummy1", "var", "value", "pct")
+    )
+  reg1 <- NULL
+  str1 <- NULL
+  tmp <- res |>
+    dplyr::filter(grepl("PCT of .* with coverage ", .data$var)) |>
+    dplyr::slice_head(n = 1) |>
+    dplyr::pull("var")
+  assertthat::assert_that(length(tmp) == 1)
+  if (grepl("genome", tmp)) {
+    str1 <- "genome"
+    reg1 <- "genome"
+  } else if (grepl("QC coverage region", tmp)) {
+    str1 <- "QC coverage region"
+    reg1 <- "qccovreg"
+  } else if (grepl("target region", tmp)) {
+    str1 <- "target region"
+    reg1 <- "targetreg"
+  } else {
+    cli::cli_abort("Cannot determine the coverage region from: {x}")
+  }
+  abbrev_nm <- abbrev_nm |>
+    dplyr::mutate(
+      raw = ifelse(.data$region, glue("{.data$raw}{str1}"), .data$raw),
+      clean = ifelse(.data$region, glue("{.data$clean}{reg1}_dragen"), .data$clean)
+    ) |>
+    dplyr::select("raw", "clean") |>
+    tibble::deframe()
+  # split to rename the
+  # "PCT of genome with coverage [100x: inf)" values
+  pat <- glue("PCT of {str1} with coverage ")
+  res1 <- res |>
+    # pct just shows % for a couple rows which can be
+    # calculated from their above values
+    dplyr::filter(!grepl(pat, .data$var)) |>
+    dplyr::select("var", "value")
+  res2 <- res |>
+    dplyr::filter(grepl(pat, .data$var)) |>
+    dplyr::mutate(
+      var = sub(pat, "", .data$var),
+      var = gsub("\\[|\\]|\\(|\\)| ", "", .data$var),
+      var = gsub("x", "", .data$var),
+      var = gsub("inf", "Inf", .data$var)
+    ) |>
+    tidyr::separate_wider_delim("var", names = c("start", "end"), delim = ":") |>
+    dplyr::mutate(var = as.character(glue("cov_pct_{start}_{end}_{reg1}_dragen"))) |>
+    dplyr::select("var", "value")
+  res <- dplyr::bind_rows(res1, res2) |>
+    dplyr::mutate(
+      value = dplyr::na_if(.data$value, "NA"),
+      value = as.numeric(.data$value),
+      var = dplyr::recode(.data$var, !!!abbrev_nm)
+    ) |>
+    tidyr::pivot_wider(names_from = "var", values_from = "value")
+  return(res)
+}
+
 #' WgsContigMeanCovFile R6 Class
 #'
 #' @description
diff --git a/R/tso_dragen.R b/R/tso_dragen.R
index e7dd346..e37bd53 100644
--- a/R/tso_dragen.R
+++ b/R/tso_dragen.R
@@ -13,8 +13,8 @@
 #'   "analysis/cttsov2/20240915ff0295ed/Logs_Intermediates/DragenCaller",
 #'   prefix
 #' )
-#' t1 <- Wf_dragen$new(path = p, prefix = prefix)
-#' t1$list_files(max_files = 100)
+#' d1 <- Wf_dragen$new(path = p, prefix = prefix)
+#' d1$list_files(max_files = 100)
 #' t1$list_files_filter_relevant(max_files = 300)
 #' d <- t1$download_files(max_files = 100, dryrun = F)
 #' d_tidy <- t1$tidy_files(d)
@@ -114,13 +114,14 @@ Wf_dragen <- R6::R6Class(
       # (though never say never!)
       res[["dragen_config"]] <- res[["dragen_config"]] |>
         tidyr::pivot_wider(names_from = "name", values_from = "value")
-      return(dplyr::bind_cols(res))
+      dat <- dplyr::bind_cols(res)
+      tibble::tibble(name = "replay", data = list(dat))
     },
     #' @description Read `contig_mean_cov.csv` file.
     #' @param x Path to file.
     #' @param keep_alt Keep ALT contigs.
     read_contigMeanCov = function(x, keep_alt = FALSE) {
-      readr::read_csv(x, col_names = c("chrom", "n_bases", "coverage"), col_types = "cdd") |>
+      dat <- readr::read_csv(x, col_names = c("chrom", "n_bases", "coverage"), col_types = "cdd") |>
         dplyr::filter(
           if (!keep_alt) {
             !grepl("chrM|MT|_|Autosomal|HLA-|EBV|GL|hs37d5", .data$chrom)
@@ -128,90 +129,43 @@ Wf_dragen <- R6::R6Class(
             TRUE
           }
         )
+      tibble::tibble(name = "contigmeancov", data = list(dat[]))
     },
-    #' @description Read `dragen.tsv.gz` cancer report hrd file.
+    #' @description Read `coverage_metrics.csv` file.
     #' @param x Path to file.
     read_coverageMetrics = function(x) {
-      # all rows except 'Aligned bases' and 'Aligned reads' refer to the region
-      abbrev_nm <- tibble::tribble(
-        ~raw, ~clean, ~region,
-        "Aligned bases", "bases_aligned_tot_dragen", FALSE,
-        "Aligned reads", "reads_aligned_tot_dragen", FALSE,
-        "Aligned bases in ", "bases_aligned_", TRUE,
-        "Average alignment coverage over ", "cov_alignment_avg_over_", TRUE,
-        "Uniformity of coverage (PCT > 0.2*mean) over ", "cov_uniformity_pct_gt02mean_", TRUE,
-        "Uniformity of coverage (PCT > 0.4*mean) over ", "cov_uniformity_pct_gt04mean_", TRUE,
-        "Average chr X coverage over ", "cov_avg_x_over_", TRUE,
-        "Average chr Y coverage over ", "cov_avg_y_over_", TRUE,
-        "Average mitochondrial coverage over ", "cov_avg_mt_over_", TRUE,
-        "Average autosomal coverage over ", "cov_avg_auto_over_", TRUE,
-        "Median autosomal coverage over ", "cov_median_auto_over_", TRUE,
-        "Mean/Median autosomal coverage ratio over ", "cov_mean_median_auto_ratio_over_", TRUE,
-        "Aligned reads in ", "reads_aligned_in_", TRUE
-      )
-      raw <- readr::read_lines(x)
-      assertthat::assert_that(grepl("COVERAGE SUMMARY", raw[1]))
-      # first detect if this is genome, QC coverage region, or target region
-      res <- raw |>
-        tibble::as_tibble_col(column_name = "value") |>
-        tidyr::separate_wider_delim(
-          "value",
-          delim = ",", too_few = "align_start",
-          names = c("category", "dummy1", "var", "value", "pct")
-        )
-      reg1 <- NULL
-      str1 <- NULL
-      tmp <- res |>
-        dplyr::filter(grepl("PCT of .* with coverage ", .data$var)) |>
-        dplyr::slice_head(n = 1) |>
-        dplyr::pull("var")
-      assertthat::assert_that(length(tmp) == 1)
-      if (grepl("genome", tmp)) {
-        str1 <- "genome"
-        reg1 <- "genome"
-      } else if (grepl("QC coverage region", tmp)) {
-        str1 <- "QC coverage region"
-        reg1 <- "qccovreg"
-      } else if (grepl("target region", tmp)) {
-        str1 <- "target region"
-        reg1 <- "targetreg"
-      } else {
-        cli::cli_abort("Cannot determine the coverage region from: {x}")
-      }
-      abbrev_nm <- abbrev_nm |>
-        dplyr::mutate(
-          raw = ifelse(.data$region, glue("{.data$raw}{str1}"), .data$raw),
-          clean = ifelse(.data$region, glue("{.data$clean}{reg1}_dragen"), .data$clean)
-        ) |>
-        dplyr::select("raw", "clean") |>
-        tibble::deframe()
-      # split to rename the
-      # "PCT of genome with coverage [100x: inf)" values
-      pat <- glue("PCT of {str1} with coverage ")
-      res1 <- res |>
-        # pct just shows % for a couple rows which can be
-        # calculated from their above values
-        dplyr::filter(!grepl(pat, .data$var)) |>
-        dplyr::select("var", "value")
-      res2 <- res |>
-        dplyr::filter(grepl(pat, .data$var)) |>
+      dat <- dragen_coverage_metrics_read(x)
+      tibble::tibble(name = "covmetrics", data = list(dat))
+    },
+    #' @description Read `fine_hist.csv` file.
+    #' @param x Path to file.
+    read_fineHist = function(x) {
+      d <- readr::read_csv(x, col_types = "cd")
+      assertthat::assert_that(all(colnames(d) == c("Depth", "Overall")))
+      # there's a max Depth of 2000+, so convert to numeric for easier plotting
+      dat <- d |>
         dplyr::mutate(
-          var = sub(pat, "", .data$var),
-          var = gsub("\\[|\\]|\\(|\\)| ", "", .data$var),
-          var = gsub("x", "", .data$var),
-          var = gsub("inf", "Inf", .data$var)
+          Depth = ifelse(grepl("+", .data$Depth), sub("(\\d*)\\+", "\\1", .data$Depth), .data$Depth),
+          Depth = as.integer(.data$Depth)
         ) |>
-        tidyr::separate_wider_delim("var", names = c("start", "end"), delim = ":") |>
-        dplyr::mutate(var = as.character(glue("cov_pct_{start}_{end}_{region}_dragen"))) |>
-        dplyr::select("var", "value")
-      res <- dplyr::bind_rows(res1, res2) |>
+        dplyr::select(depth = "Depth", n_loci = "Overall")
+      tibble::tibble(name = "finehist", data = list(dat))
+    },
+    #' @description Read `fragment_length_hist.csv` file.
+    #' @param x Path to file.
+    read_fragmentLengthHist = function(x) {
+      d <- readr::read_lines(x)
+      assertthat::assert_that(grepl("#Sample", d[1]))
+      dat <- d |>
+        tibble::enframe(name = "name", value = "value") |>
+        dplyr::filter(!grepl("#Sample: |FragmentLength,Count", .data$value)) |>
+        tidyr::separate_wider_delim(cols = "value", names = c("fragmentLength", "count"), delim = ",") |>
         dplyr::mutate(
-          value = dplyr::na_if(.data$value, "NA"),
-          value = as.numeric(.data$value),
-          var = dplyr::recode(.data$var, !!!abbrev_nm)
+          count = as.numeric(.data$count),
+          fragmentLength = as.numeric(.data$fragmentLength)
         ) |>
-        tidyr::pivot_wider(names_from = "var", values_from = "value")
-      return(res)
+        dplyr::select("fragmentLength", "count")
+      tibble::tibble(name = "fraglen", data = list(dat))
     }
   ) # end public
 ) # end Wf_dragen
diff --git a/man/Wf_dragen.Rd b/man/Wf_dragen.Rd
index 1c57315..3402faf 100644
--- a/man/Wf_dragen.Rd
+++ b/man/Wf_dragen.Rd
@@ -16,8 +16,8 @@ p <- file.path(
   "analysis/cttsov2/20240915ff0295ed/Logs_Intermediates/DragenCaller",
   prefix
 )
-t1 <- Wf_dragen$new(path = p, prefix = prefix)
-t1$list_files(max_files = 100)
+d1 <- Wf_dragen$new(path = p, prefix = prefix)
+d1$list_files(max_files = 100)
 t1$list_files_filter_relevant(max_files = 300)
 d <- t1$download_files(max_files = 100, dryrun = F)
 d_tidy <- t1$tidy_files(d)
@@ -47,6 +47,8 @@ d_write <- t1$write(
 \item \href{#method-Wf_dragen-read_replay}{\code{Wf_dragen$read_replay()}}
 \item \href{#method-Wf_dragen-read_contigMeanCov}{\code{Wf_dragen$read_contigMeanCov()}}
 \item \href{#method-Wf_dragen-read_coverageMetrics}{\code{Wf_dragen$read_coverageMetrics()}}
+\item \href{#method-Wf_dragen-read_fineHist}{\code{Wf_dragen$read_fineHist()}}
+\item \href{#method-Wf_dragen-read_fragmentLengthHist}{\code{Wf_dragen$read_fragmentLengthHist()}}
 \item \href{#method-Wf_dragen-clone}{\code{Wf_dragen$clone()}}
 }
 }
@@ -138,11 +140,45 @@ Read \code{contig_mean_cov.csv} file.
 \if{html}{\out{<a id="method-Wf_dragen-read_coverageMetrics"></a>}}
 \if{latex}{\out{\hypertarget{method-Wf_dragen-read_coverageMetrics}{}}}
 \subsection{Method \code{read_coverageMetrics()}}{
-Read \code{dragen.tsv.gz} cancer report hrd file.
+Read \code{coverage_metrics.csv} file.
 \subsection{Usage}{
 \if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_coverageMetrics(x)}\if{html}{\out{</div>}}
 }
 
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_fineHist"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_fineHist}{}}}
+\subsection{Method \code{read_fineHist()}}{
+Read \code{fine_hist.csv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_fineHist(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_fragmentLengthHist"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_fragmentLengthHist}{}}}
+\subsection{Method \code{read_fragmentLengthHist()}}{
+Read \code{fragment_length_hist.csv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_fragmentLengthHist(x)}\if{html}{\out{</div>}}
+}
+
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
diff --git a/man/dragen_coverage_metrics_read.Rd b/man/dragen_coverage_metrics_read.Rd
new file mode 100644
index 0000000..a81e906
--- /dev/null
+++ b/man/dragen_coverage_metrics_read.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dragen.R
+\name{dragen_coverage_metrics_read}
+\alias{dragen_coverage_metrics_read}
+\title{Read DRAGEN Coverage Metrics}
+\usage{
+dragen_coverage_metrics_read(x)
+}
+\arguments{
+\item{x}{Path to file.}
+}
+\value{
+Tibble with data in list column.
+}
+\description{
+Reads the \code{coverage_metrics.csv} file generated by DRAGEN.
+}
diff --git a/tests/testthat/test-roxytest-testexamples-dragen.R b/tests/testthat/test-roxytest-testexamples-dragen.R
index 1caa4ee..0054e64 100644
--- a/tests/testthat/test-roxytest-testexamples-dragen.R
+++ b/tests/testthat/test-roxytest-testexamples-dragen.R
@@ -2,7 +2,7 @@
 
 # File R/dragen.R: @testexamples
 
-test_that("Function time_metrics_process() @ L789", {
+test_that("Function time_metrics_process() @ L879", {
   
   p <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys")
   x <- TimeMetricsFile$new(p)

From c1e4aa3e90f241ca78c4bd5b029d9c45fb68c9f0 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Wed, 16 Oct 2024 02:20:44 +1100
Subject: [PATCH 05/17] dragen: more parsers

---
 NAMESPACE                                     |   1 +
 R/dragen.R                                    | 127 +++++++++++++++++-
 R/tso_dragen.R                                |  59 ++++----
 man/Wf_dragen.Rd                              |  24 +++-
 man/dragen_coverage_metrics_read.Rd           |   2 +-
 man/dragen_mapping_metrics_read.Rd            |  17 +++
 .../test-roxytest-testexamples-dragen.R       |   2 +-
 7 files changed, 201 insertions(+), 31 deletions(-)
 create mode 100644 man/dragen_mapping_metrics_read.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 859e3dc..5c6f744 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -37,6 +37,7 @@ export(dr_gds_download)
 export(dr_output_format_valid)
 export(dr_s3_download)
 export(dragen_coverage_metrics_read)
+export(dragen_mapping_metrics_read)
 export(dtw_Wf_tso_ctdna_tumor_only)
 export(dtw_Wf_tso_ctdna_tumor_only_v2)
 export(empty_tbl)
diff --git a/R/dragen.R b/R/dragen.R
index 95790b0..082ca1a 100644
--- a/R/dragen.R
+++ b/R/dragen.R
@@ -1,9 +1,134 @@
+dragen_subprefix <- function(x, suffix) {
+  # L2401290.exon_contig_mean_cov.csv -> exon
+  # L2401290.target_bed_contig_mean_cov.csv -> target_bed
+  # L2401290.tmb_contig_mean_cov.csv -> tmb
+  # L2401290.wgs_contig_mean_cov.csv -> wgs
+  bname <- basename(x)
+  s1 <- tools::file_path_sans_ext(bname)
+  s2 <- sub(".*\\.(.*)", "\\1", s1)
+  sub(suffix, "", s2)
+}
+
+#' Read DRAGEN Mapping Metrics
+#'
+#' Reads the `mapping_metrics.csv` file output from DRAGEN.
+#'
+#' @param x Path to file.
+#'
+#' @return Tibble with metrics.
+#' @export
+dragen_mapping_metrics_read <- function(x) {
+  abbrev_nm <- c(
+    "Total input reads" = "reads_tot_input_dragen",
+    "Number of duplicate marked reads" = "reads_num_dupmarked_dragen",
+    "Number of duplicate marked and mate reads removed" = "reads_num_dupmarked_mate_reads_removed_dragen",
+    "Number of unique reads (excl. duplicate marked reads)" = "reads_num_uniq_dragen",
+    "Reads with mate sequenced" = "reads_w_mate_seq_dragen",
+    "Reads without mate sequenced" = "reads_wo_mate_seq_dragen",
+    "QC-failed reads" = "reads_qcfail_dragen",
+    "Mapped reads" = "reads_mapped_dragen",
+    "Mapped reads adjusted for filtered mapping" = "reads_mapped_adjfilt_dragen",
+    "Mapped reads R1" = "reads_mapped_r1_dragen",
+    "Mapped reads R2" = "reads_mapped_r2_dragen",
+    "Number of unique & mapped reads (excl. duplicate marked reads)" = "reads_num_uniq_mapped_dragen",
+    "Unmapped reads" = "reads_unmapped_dragen",
+    "Unmapped reads adjusted for filtered mapping" = "reads_unmapped_adjfilt_dragen",
+    "Adjustment of reads matching non-reference decoys" = "reads_match_nonref_decoys_adj_dragen",
+    "Singleton reads (itself mapped; mate unmapped)" = "reads_singleton_dragen",
+    "Paired reads (itself & mate mapped)" = "reads_paired_dragen",
+    "Properly paired reads" = "reads_paired_proper_dragen",
+    "Not properly paired reads (discordant)" = "reads_discordant_dragen",
+    "Paired reads mapped to different chromosomes" = "reads_paired_mapped_diff_chrom_dragen",
+    "Paired reads mapped to different chromosomes (MAPQ>=10)" = "reads_paired_mapped_diff_chrom_mapq10_dragen",
+    "Reads with MAPQ [40:inf)" = "reads_mapq_40_inf_dragen",
+    "Reads with MAPQ [30:40)" = "reads_mapq_30_40_dragen",
+    "Reads with MAPQ [20:30)" = "reads_mapq_20_30_dragen",
+    "Reads with MAPQ [10:20)" = "reads_mapq_10_20_dragen",
+    "Reads with MAPQ [ 0:10)" = "reads_mapq_0_10_dragen",
+    "Reads with MAPQ NA (Unmapped reads)" = "reads_mapq_na_unmapped_dragen",
+    "Reads with indel R1" = "reads_indel_r1_dragen",
+    "Reads with indel R2" = "reads_indel_r2_dragen",
+    "Total bases" = "bases_tot_dragen",
+    "Total bases R1" = "bases_tot_r1_dragen",
+    "Total bases R2" = "bases_tot_r2_dragen",
+    "Mapped bases" = "bases_mapped_dragen",
+    "Mapped bases R1" = "bases_mapped_r1_dragen",
+    "Mapped bases R2" = "bases_mapped_r2_dragen",
+    "Soft-clipped bases" = "bases_softclip_dragen",
+    "Soft-clipped bases R1" = "bases_softclip_r1_dragen",
+    "Soft-clipped bases R2" = "bases_softclip_r2_dragen",
+    "Hard-clipped bases" = "bases_hardclip_dragen",
+    "Hard-clipped bases R1" = "bases_hardclip_r1_dragen",
+    "Hard-clipped bases R2" = "bases_hardclip_r2_dragen",
+    "Mismatched bases R1" = "bases_mismatched_r1_dragen",
+    "Mismatched bases R2" = "bases_mismatched_r2_dragen",
+    "Mismatched bases R1 (excl. indels)" = "bases_mismatched_r1_noindels_dragen",
+    "Mismatched bases R2 (excl. indels)" = "bases_mismatched_r2_noindels_dragen",
+    "Q30 bases" = "bases_q30_dragen",
+    "Q30 bases R1" = "bases_q30_r1_dragen",
+    "Q30 bases R2" = "bases_q30_r2_dragen",
+    "Q30 bases (excl. dups & clipped bases)" = "bases_q30_nodups_noclipped_dragen",
+    "Total alignments" = "alignments_tot_dragen",
+    "Secondary alignments" = "alignments_secondary_dragen",
+    "Supplementary (chimeric) alignments" = "alignments_chimeric_dragen",
+    "Estimated read length" = "read_len_dragen",
+    "Bases in reference genome" = "bases_in_ref_genome_dragen",
+    "Bases in target bed [% of genome]" = "bases_in_target_bed_genome_pct_dragen",
+    "Insert length: mean" = "insert_len_mean_dragen",
+    "Insert length: median" = "insert_len_median_dragen",
+    "Insert length: standard deviation" = "insert_len_std_dev_dragen",
+    "Provided sex chromosome ploidy" = "ploidy_sex_chrom_provided_dragen",
+    "Estimated sample contamination" = "contamination_est_dragen",
+    "Estimated sample contamination standard error" = "contamination_stderr_est_dragen",
+    "DRAGEN mapping rate [mil. reads/second]" = "mapping_rate_dragen_milreads_per_sec_dragen",
+    "Total reads in RG" = "reads_tot_rg_dragen",
+    "Mapped reads adjusted for excluded mapping" = "reads_mapped_adjexcl_dragen",
+    "Mapped reads adjusted for filtered and excluded mapping" = "reads_mapped_adjfiltexcl_dragen",
+    "Unmapped reads adjusted for excluded mapping" = "reads_unmapped_adjexcl_dragen",
+    "Unmapped reads adjusted for filtered and excluded mapping" = "reads_unmapped_adjfiltexcl_dragen",
+    "Reads mapping to multiple locations" = "reads_map_multiloc_dragen",
+    "Adjustment of reads matching filter contigs" = "reads_match_filt_contig_adj_dragen",
+    "Reads with splice junction" = "reads_splicejunc_dragen",
+    "Average sequenced coverage over genome" = "cov_avg_seq_over_genome_dragen",
+    "Filtered rRNA reads" = "reads_rrna_filtered_dragen"
+  )
+  raw <- readr::read_lines(x)
+  assertthat::assert_that(grepl("MAPPING/ALIGNING", raw[1]))
+  # split by RG and non-RG
+  # tidy
+  d <- raw |>
+    tibble::as_tibble_col(column_name = "value") |>
+    tidyr::separate_wider_delim(
+      "value",
+      names = c("category", "RG", "var", "count", "pct"),
+      delim = ",", too_few = "align_start"
+    ) |>
+    dplyr::mutate(
+      count = dplyr::na_if(.data$count, "NA"),
+      count = as.numeric(.data$count),
+      pct = as.numeric(.data$pct),
+      var = dplyr::recode(.data$var, !!!abbrev_nm),
+      RG = dplyr::if_else(.data$RG == "", "Total", .data$RG)
+    ) |>
+    dplyr::select("RG", "var", "count", "pct")
+  # pivot
+  d |>
+    tidyr::pivot_longer(c("count", "pct")) |>
+    dplyr::mutate(
+      name = dplyr::if_else(.data$name == "count", "", "_pct"),
+      var = glue("{.data$var}{.data$name}")
+    ) |>
+    dplyr::select("RG", "var", "value") |>
+    dplyr::filter(!is.na(.data$value)) |>
+    tidyr::pivot_wider(names_from = "var", values_from = "value")
+}
+
 #' Read DRAGEN Coverage Metrics
 #'
 #' Reads the `coverage_metrics.csv` file generated by DRAGEN.
 #' @param x Path to file.
 #'
-#' @return Tibble with data in list column.
+#' @return Tibble with metrics.
 #' @export
 dragen_coverage_metrics_read <- function(x) {
   # all rows except 'Aligned bases' and 'Aligned reads' refer to the region
diff --git a/R/tso_dragen.R b/R/tso_dragen.R
index e37bd53..80d940d 100644
--- a/R/tso_dragen.R
+++ b/R/tso_dragen.R
@@ -15,9 +15,9 @@
 #' )
 #' d1 <- Wf_dragen$new(path = p, prefix = prefix)
 #' d1$list_files(max_files = 100)
-#' t1$list_files_filter_relevant(max_files = 300)
-#' d <- t1$download_files(max_files = 100, dryrun = F)
-#' d_tidy <- t1$tidy_files(d)
+#' d1$list_files_filter_relevant(max_files = 300)
+#' d <- d1$download_files(max_files = 100, dryrun = F)
+#' d_tidy <- d1$tidy_files(d)
 #' d_write <- t1$write(
 #'   d_tidy,
 #'   outdir = file.path(p, "dracarys_tidy"),
@@ -44,37 +44,37 @@ Wf_dragen <- R6::R6Class(
         glue("{pref}\\-replay\\.json$"), "replay",
         glue("{pref}\\.cnv_metrics.csv$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.exon_contig_mean_cov\\.csv$"), "contigMeanCov",
-        glue("{pref}\\.exon_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.exon_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.target_bed_contig_mean_cov\\.csv$"), "contigMeanCov",
+        glue("{pref}\\.tmb_contig_mean_cov\\.csv$"), "contigMeanCov",
+        glue("{pref}\\.wgs_contig_mean_cov\\.csv$"), "contigMeanCov",
+        glue("{pref}\\.exon_coverage_metrics\\.csv$"), "coverageMetrics",
+        glue("{pref}\\.target_bed_coverage_metrics\\.csv$"), "coverageMetrics",
+        glue("{pref}\\.tmb_coverage_metrics\\.csv$"), "coverageMetrics",
+        glue("{pref}\\.wgs_coverage_metrics\\.csv$"), "coverageMetrics",
+        glue("{pref}\\.exon_fine_hist\\.csv$"), "fineHist",
+        glue("{pref}\\.target_bed_fine_hist\\.csv$"), "fineHist",
+        glue("{pref}\\.tmb_fine_hist\\.csv$"), "fineHist",
+        glue("{pref}\\.wgs_fine_hist\\.csv$"), "fineHist",
         glue("{pref}\\.exon_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.target_bed_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.tmb_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.wgs_hist\\.csv$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.exon_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.target_bed_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.tmb_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.wgs_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.fastqc_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.fragment_length_hist\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.fragment_length_hist\\.csv$"), "fragmentLengthHist",
         glue("{pref}\\.gc_metrics\\.csv$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.gvcf_metrics\\.csv$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.mapping_metrics\\.csv$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.microsat_diffs\\.txt$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.microsat_output\\.json$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.sv_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.target_bed_contig_mean_cov\\.csv$"), "contigMeanCov",
-        glue("{pref}\\.target_bed_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.target_bed_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.target_bed_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.target_bed_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.time_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.tmb_contig_mean_cov\\.csv$"), "contigMeanCov",
-        glue("{pref}\\.tmb_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.tmb_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.tmb_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.tmb_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.trimmer_metrics\\.csv$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.umi_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.vc_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.wgs_contig_mean_cov\\.csv$"), "contigMeanCov",
-        glue("{pref}\\.wgs_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.wgs_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.wgs_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.wgs_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY"
+        glue("{pref}\\.vc_metrics\\.csv$"), "DOWNLOAD_ONLY"
       )
       regexes <- reg1 |>
         dplyr::mutate(
@@ -121,6 +121,7 @@ Wf_dragen <- R6::R6Class(
     #' @param x Path to file.
     #' @param keep_alt Keep ALT contigs.
     read_contigMeanCov = function(x, keep_alt = FALSE) {
+      subprefix <- dragen_subprefix(x, "_contig_mean_cov")
       dat <- readr::read_csv(x, col_names = c("chrom", "n_bases", "coverage"), col_types = "cdd") |>
         dplyr::filter(
           if (!keep_alt) {
@@ -129,17 +130,19 @@ Wf_dragen <- R6::R6Class(
             TRUE
           }
         )
-      tibble::tibble(name = "contigmeancov", data = list(dat[]))
+      tibble::tibble(name = glue("contigmeancov_{subprefix}"), data = list(dat[]))
     },
     #' @description Read `coverage_metrics.csv` file.
     #' @param x Path to file.
     read_coverageMetrics = function(x) {
+      subprefix <- dragen_subprefix(x, "_coverage_metrics")
       dat <- dragen_coverage_metrics_read(x)
-      tibble::tibble(name = "covmetrics", data = list(dat))
+      tibble::tibble(name = glue("covmetrics_{subprefix}"), data = list(dat))
     },
     #' @description Read `fine_hist.csv` file.
     #' @param x Path to file.
     read_fineHist = function(x) {
+      subprefix <- dragen_subprefix(x, "_fine_hist")
       d <- readr::read_csv(x, col_types = "cd")
       assertthat::assert_that(all(colnames(d) == c("Depth", "Overall")))
       # there's a max Depth of 2000+, so convert to numeric for easier plotting
@@ -149,7 +152,7 @@ Wf_dragen <- R6::R6Class(
           Depth = as.integer(.data$Depth)
         ) |>
         dplyr::select(depth = "Depth", n_loci = "Overall")
-      tibble::tibble(name = "finehist", data = list(dat))
+      tibble::tibble(name = glue("finehist_{subprefix}"), data = list(dat))
     },
     #' @description Read `fragment_length_hist.csv` file.
     #' @param x Path to file.
@@ -166,6 +169,12 @@ Wf_dragen <- R6::R6Class(
         ) |>
         dplyr::select("fragmentLength", "count")
       tibble::tibble(name = "fraglen", data = list(dat))
+    },
+    #' @description Read `mapping_metrics.csv` file.
+    #' @param x Path to file.
+    read_mappingMetrics = function(x) {
+      dat <- dragen_mapping_metrics_read(x)
+      tibble::tibble(name = "mapmetrics", data = list(dat))
     }
   ) # end public
 ) # end Wf_dragen
diff --git a/man/Wf_dragen.Rd b/man/Wf_dragen.Rd
index 3402faf..e023ca1 100644
--- a/man/Wf_dragen.Rd
+++ b/man/Wf_dragen.Rd
@@ -18,9 +18,9 @@ p <- file.path(
 )
 d1 <- Wf_dragen$new(path = p, prefix = prefix)
 d1$list_files(max_files = 100)
-t1$list_files_filter_relevant(max_files = 300)
-d <- t1$download_files(max_files = 100, dryrun = F)
-d_tidy <- t1$tidy_files(d)
+d1$list_files_filter_relevant(max_files = 300)
+d <- d1$download_files(max_files = 100, dryrun = F)
+d_tidy <- d1$tidy_files(d)
 d_write <- t1$write(
   d_tidy,
   outdir = file.path(p, "dracarys_tidy"),
@@ -49,6 +49,7 @@ d_write <- t1$write(
 \item \href{#method-Wf_dragen-read_coverageMetrics}{\code{Wf_dragen$read_coverageMetrics()}}
 \item \href{#method-Wf_dragen-read_fineHist}{\code{Wf_dragen$read_fineHist()}}
 \item \href{#method-Wf_dragen-read_fragmentLengthHist}{\code{Wf_dragen$read_fragmentLengthHist()}}
+\item \href{#method-Wf_dragen-read_mappingMetrics}{\code{Wf_dragen$read_mappingMetrics()}}
 \item \href{#method-Wf_dragen-clone}{\code{Wf_dragen$clone()}}
 }
 }
@@ -179,6 +180,23 @@ Read \code{fragment_length_hist.csv} file.
 \if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_fragmentLengthHist(x)}\if{html}{\out{</div>}}
 }
 
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_mappingMetrics"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_mappingMetrics}{}}}
+\subsection{Method \code{read_mappingMetrics()}}{
+Read \code{mapping_metrics.csv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_mappingMetrics(x)}\if{html}{\out{</div>}}
+}
+
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
diff --git a/man/dragen_coverage_metrics_read.Rd b/man/dragen_coverage_metrics_read.Rd
index a81e906..96fd3ef 100644
--- a/man/dragen_coverage_metrics_read.Rd
+++ b/man/dragen_coverage_metrics_read.Rd
@@ -10,7 +10,7 @@ dragen_coverage_metrics_read(x)
 \item{x}{Path to file.}
 }
 \value{
-Tibble with data in list column.
+Tibble with metrics.
 }
 \description{
 Reads the \code{coverage_metrics.csv} file generated by DRAGEN.
diff --git a/man/dragen_mapping_metrics_read.Rd b/man/dragen_mapping_metrics_read.Rd
new file mode 100644
index 0000000..223f0a7
--- /dev/null
+++ b/man/dragen_mapping_metrics_read.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dragen.R
+\name{dragen_mapping_metrics_read}
+\alias{dragen_mapping_metrics_read}
+\title{Read DRAGEN Mapping Metrics}
+\usage{
+dragen_mapping_metrics_read(x)
+}
+\arguments{
+\item{x}{Path to file.}
+}
+\value{
+Tibble with metrics.
+}
+\description{
+Reads the \code{mapping_metrics.csv} file output from DRAGEN.
+}
diff --git a/tests/testthat/test-roxytest-testexamples-dragen.R b/tests/testthat/test-roxytest-testexamples-dragen.R
index 0054e64..1bc8cea 100644
--- a/tests/testthat/test-roxytest-testexamples-dragen.R
+++ b/tests/testthat/test-roxytest-testexamples-dragen.R
@@ -2,7 +2,7 @@
 
 # File R/dragen.R: @testexamples
 
-test_that("Function time_metrics_process() @ L879", {
+test_that("Function time_metrics_process() @ L1004", {
   
   p <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys")
   x <- TimeMetricsFile$new(p)

From 44be1a2b2cf1c6472f444cb8a3633a823f3191c7 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Wed, 16 Oct 2024 11:16:16 +1100
Subject: [PATCH 06/17] dragen: add hist cumsum parser

---
 R/tso_dragen.R | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/R/tso_dragen.R b/R/tso_dragen.R
index 80d940d..4785123 100644
--- a/R/tso_dragen.R
+++ b/R/tso_dragen.R
@@ -55,14 +55,10 @@ Wf_dragen <- R6::R6Class(
         glue("{pref}\\.target_bed_fine_hist\\.csv$"), "fineHist",
         glue("{pref}\\.tmb_fine_hist\\.csv$"), "fineHist",
         glue("{pref}\\.wgs_fine_hist\\.csv$"), "fineHist",
-        glue("{pref}\\.exon_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.target_bed_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.tmb_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.wgs_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.exon_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.target_bed_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.tmb_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.wgs_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.exon_hist\\.csv$"), "hist",
+        glue("{pref}\\.target_bed_hist\\.csv$"), "hist",
+        glue("{pref}\\.tmb_hist\\.csv$"), "hist",
+        glue("{pref}\\.wgs_hist\\.csv$"), "hist",
         glue("{pref}\\.fastqc_metrics\\.csv$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.fragment_length_hist\\.csv$"), "fragmentLengthHist",
         glue("{pref}\\.gc_metrics\\.csv$"), "DOWNLOAD_ONLY",
@@ -175,6 +171,26 @@ Wf_dragen <- R6::R6Class(
     read_mappingMetrics = function(x) {
       dat <- dragen_mapping_metrics_read(x)
       tibble::tibble(name = "mapmetrics", data = list(dat))
+    },
+    #' @description Read `hist.csv` file.
+    read_hist = function(x) {
+      subprefix <- dragen_subprefix(x, "_hist")
+      d <- readr::read_csv(x, col_names = c("var", "pct"), col_types = "cd")
+      dat <- d |>
+        dplyr::mutate(
+          var = sub("PCT of bases in .* with coverage ", "", .data$var),
+          var = gsub("\\[|\\]|\\(|\\)", "", .data$var),
+          var = gsub("x", "", .data$var),
+          var = gsub("inf", "Inf", .data$var)
+        ) |>
+        tidyr::separate_wider_delim("var", names = c("start", "end"), delim = ":") |>
+        dplyr::mutate(
+          start = as.numeric(.data$start),
+          end = as.numeric(.data$end),
+          pct = round(.data$pct, 2),
+          cumsum = cumsum(.data$pct)
+        )
+      tibble::tibble(name = glue("hist_{subprefix}"), data = list(dat))
     }
   ) # end public
 ) # end Wf_dragen

From e5ffa3b0a7e62f2054fe3539cd4c5ee86f1ef99c Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Thu, 17 Oct 2024 00:51:55 +1100
Subject: [PATCH 07/17] dragen: parse vc/time metrics

---
 R/dragen.R                                    | 93 +++++++++++++++++++
 R/tso_dragen.R                                | 37 +++++++-
 man/Wf_dragen.Rd                              | 54 +++++++++++
 .../test-roxytest-testexamples-dragen.R       |  2 +-
 4 files changed, 180 insertions(+), 6 deletions(-)

diff --git a/R/dragen.R b/R/dragen.R
index 082ca1a..9f66022 100644
--- a/R/dragen.R
+++ b/R/dragen.R
@@ -9,6 +9,99 @@ dragen_subprefix <- function(x, suffix) {
   sub(suffix, "", s2)
 }
 
+dragen_vc_metrics_read <- function(x) {
+  abbrev_nm1 <- tibble::tribble(
+    ~raw, ~clean, ~region,
+    "Total", "var_tot_dragen", FALSE,
+    "Biallelic", "var_biallelic_dragen", FALSE,
+    "Multiallelic", "var_multiallelic_dragen", FALSE,
+    "SNPs", "var_snp_dragen", FALSE,
+    "Insertions (Hom)", "var_ins_hom_dragen", FALSE,
+    "Insertions (Het)", "var_ins_het_dragen", FALSE,
+    "Deletions (Hom)", "var_del_hom_dragen", FALSE,
+    "Deletions (Het)", "var_del_het_dragen", FALSE,
+    "Indels (Het)", "var_indel_het_dragen", FALSE,
+    "Chr X number of SNPs over ", "var_snp_x_over_", TRUE,
+    "Chr Y number of SNPs over ", "var_snp_y_over_", TRUE,
+    "(Chr X SNPs)/(chr Y SNPs) ratio over ", "var_x_over_y_snp_ratio_over_", TRUE,
+    "SNP Transitions", "var_snp_transitions_dragen", FALSE,
+    "SNP Transversions", "var_snp_transversions_dragen", FALSE,
+    "Ti/Tv ratio", "var_ti_tv_ratio_dragen", FALSE,
+    "Heterozygous", "var_heterozygous_dragen", FALSE,
+    "Homozygous", "var_homozygous_dragen", FALSE,
+    "Het/Hom ratio", "var_het_hom_ratio_dragen", FALSE,
+    "In dbSNP", "var_in_dbsnp_dragen", FALSE,
+    "Not in dbSNP", "var_nin_dbsnp_dragen", FALSE,
+    "Percent Callability", "callability_pct_dragen", FALSE,
+    "Percent Autosome Callability", "callability_auto_pct_dragen", FALSE,
+    "Number of samples", "sample_num_dragen", FALSE,
+    "Reads Processed", "reads_processed_dragen", FALSE,
+    "Child Sample", "sample_child_dragen", FALSE
+  )
+  raw <- readr::read_lines(x)
+  assertthat::assert_that(grepl("VARIANT CALLER", raw[1]))
+  # first detect if this is genome or target region
+  res <- raw |>
+    tibble::as_tibble_col(column_name = "value") |>
+    tidyr::separate_wider_delim(
+      "value",
+      names = c("category", "sample", "var", "count", "pct"),
+      delim = ",", too_few = "align_start"
+    )
+  reg1 <- NULL
+  str1 <- NULL
+  tmp <- res |>
+    dplyr::filter(grepl("Chr X number of SNPs over ", .data$var)) |>
+    dplyr::slice_head(n = 1) |>
+    dplyr::pull("var")
+  assertthat::assert_that(length(tmp) == 1)
+  if (grepl("genome", tmp)) {
+    str1 <- "genome"
+    reg1 <- "genome"
+  } else if (grepl("QC coverage region", tmp)) {
+    str1 <- "QC coverage region"
+    reg1 <- "qccovreg"
+  } else if (grepl("target region", tmp)) {
+    str1 <- "target region"
+    reg1 <- "targetreg"
+  } else {
+    cli::cli_abort("Cannot determine the varcall region from: {x}")
+  }
+  abbrev_nm <- abbrev_nm1 |>
+    dplyr::mutate(
+      raw = ifelse(.data$region, glue("{.data$raw}{str1}"), .data$raw),
+      clean = ifelse(.data$region, glue("{.data$clean}{reg1}_dragen"), .data$clean)
+    ) |>
+    dplyr::select("raw", "clean") |>
+    tibble::deframe()
+
+  d <- res |>
+    dplyr::mutate(
+      var = dplyr::recode(.data$var, !!!abbrev_nm),
+      count = dplyr::na_if(.data$count, "NA"),
+      count = as.numeric(.data$count),
+      pct = round(as.numeric(.data$pct), 2),
+      category = dplyr::case_when(
+        grepl("SUMMARY", .data$category) ~ "summary",
+        grepl("PREFILTER", .data$category) ~ "prefilter",
+        grepl("POSTFILTER", .data$category) ~ "postfilter",
+        TRUE ~ "unknown"
+      )
+    ) |>
+    dplyr::filter(.data$category != "summary") |>
+    dplyr::select("category", "sample", "var", "count", "pct")
+  # pivot
+  d |>
+    tidyr::pivot_longer(c("count", "pct")) |>
+    dplyr::mutate(
+      name = dplyr::if_else(.data$name == "count", "", "_pct"),
+      var = glue("{.data$var}{.data$name}")
+    ) |>
+    dplyr::select("category", "sample", "var", "value") |>
+    dplyr::filter(!is.na(.data$value)) |>
+    tidyr::pivot_wider(names_from = "var", values_from = "value")
+}
+
 #' Read DRAGEN Mapping Metrics
 #'
 #' Reads the `mapping_metrics.csv` file output from DRAGEN.
diff --git a/R/tso_dragen.R b/R/tso_dragen.R
index 4785123..22d5768 100644
--- a/R/tso_dragen.R
+++ b/R/tso_dragen.R
@@ -62,15 +62,15 @@ Wf_dragen <- R6::R6Class(
         glue("{pref}\\.fastqc_metrics\\.csv$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.fragment_length_hist\\.csv$"), "fragmentLengthHist",
         glue("{pref}\\.gc_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.gvcf_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.mapping_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.gvcf_metrics\\.csv$"), "vcMetrics",
+        glue("{pref}\\.mapping_metrics\\.csv$"), "mappingMetrics",
         glue("{pref}\\.microsat_diffs\\.txt$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.microsat_output\\.json$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.sv_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.time_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.time_metrics\\.csv$"), "timeMetrics",
         glue("{pref}\\.trimmer_metrics\\.csv$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.umi_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.vc_metrics\\.csv$"), "DOWNLOAD_ONLY"
+        glue("{pref}\\.vc_metrics\\.csv$"), "vcMetrics"
       )
       regexes <- reg1 |>
         dplyr::mutate(
@@ -172,7 +172,8 @@ Wf_dragen <- R6::R6Class(
       dat <- dragen_mapping_metrics_read(x)
       tibble::tibble(name = "mapmetrics", data = list(dat))
     },
-    #' @description Read `hist.csv` file.
+    #' @description Read `hist.csv` (not `fine_hist.csv`!) file.
+    #' @param x Path to file.
     read_hist = function(x) {
       subprefix <- dragen_subprefix(x, "_hist")
       d <- readr::read_csv(x, col_names = c("var", "pct"), col_types = "cd")
@@ -191,6 +192,32 @@ Wf_dragen <- R6::R6Class(
           cumsum = cumsum(.data$pct)
         )
       tibble::tibble(name = glue("hist_{subprefix}"), data = list(dat))
+    },
+    #' @description Read `time_metrics.csv` file.
+    #' @param x Path to file.
+    read_timeMetrics = function(x) {
+      cn <- c("dummy1", "dummy2", "Step", "time_hrs", "time_sec")
+      ct <- readr::cols(.default = "c", time_hrs = readr::col_time(format = "%T"), time_sec = "d")
+      d <- readr::read_csv(x, col_names = cn, col_types = ct)
+      assertthat::assert_that(d$dummy1[1] == "RUN TIME", is.na(d$dummy2[1]))
+      assertthat::assert_that(inherits(d$time_hrs, "hms"))
+      dat <- d |>
+        dplyr::mutate(
+          Step = tools::toTitleCase(sub("Time ", "", .data$Step)),
+          Step = gsub(" |/", "", .data$Step),
+          Time = substr(.data$time_hrs, 1, 5)
+        ) |>
+        dplyr::select("Step", "Time") |>
+        tidyr::pivot_wider(names_from = "Step", values_from = "Time") |>
+        dplyr::relocate("TotalRuntime")
+      tibble::tibble(name = "timemetrics", data = list(dat))
+    },
+    #' @description Read `vc_metrics.csv`/`gvcf_metrics.csv` file.
+    #' @param x Path to file.
+    read_vcMetrics = function(x) {
+      subprefix <- dragen_subprefix(x, "_metrics")
+      dat <- dragen_vc_metrics_read(x)
+      tibble::tibble(name = glue("vcmetrics_{subprefix}"), data = list(dat[]))
     }
   ) # end public
 ) # end Wf_dragen
diff --git a/man/Wf_dragen.Rd b/man/Wf_dragen.Rd
index e023ca1..57ff416 100644
--- a/man/Wf_dragen.Rd
+++ b/man/Wf_dragen.Rd
@@ -50,6 +50,9 @@ d_write <- t1$write(
 \item \href{#method-Wf_dragen-read_fineHist}{\code{Wf_dragen$read_fineHist()}}
 \item \href{#method-Wf_dragen-read_fragmentLengthHist}{\code{Wf_dragen$read_fragmentLengthHist()}}
 \item \href{#method-Wf_dragen-read_mappingMetrics}{\code{Wf_dragen$read_mappingMetrics()}}
+\item \href{#method-Wf_dragen-read_hist}{\code{Wf_dragen$read_hist()}}
+\item \href{#method-Wf_dragen-read_timeMetrics}{\code{Wf_dragen$read_timeMetrics()}}
+\item \href{#method-Wf_dragen-read_vcMetrics}{\code{Wf_dragen$read_vcMetrics()}}
 \item \href{#method-Wf_dragen-clone}{\code{Wf_dragen$clone()}}
 }
 }
@@ -197,6 +200,57 @@ Read \code{mapping_metrics.csv} file.
 \if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_mappingMetrics(x)}\if{html}{\out{</div>}}
 }
 
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_hist"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_hist}{}}}
+\subsection{Method \code{read_hist()}}{
+Read \code{hist.csv} (not \code{fine_hist.csv}!) file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_hist(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_timeMetrics"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_timeMetrics}{}}}
+\subsection{Method \code{read_timeMetrics()}}{
+Read \code{time_metrics.csv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_timeMetrics(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_vcMetrics"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_vcMetrics}{}}}
+\subsection{Method \code{read_vcMetrics()}}{
+Read \code{vc_metrics.csv}/\code{gvcf_metrics.csv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_vcMetrics(x)}\if{html}{\out{</div>}}
+}
+
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
diff --git a/tests/testthat/test-roxytest-testexamples-dragen.R b/tests/testthat/test-roxytest-testexamples-dragen.R
index 1bc8cea..e220df4 100644
--- a/tests/testthat/test-roxytest-testexamples-dragen.R
+++ b/tests/testthat/test-roxytest-testexamples-dragen.R
@@ -2,7 +2,7 @@
 
 # File R/dragen.R: @testexamples
 
-test_that("Function time_metrics_process() @ L1004", {
+test_that("Function time_metrics_process() @ L1097", {
   
   p <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys")
   x <- TimeMetricsFile$new(p)

From eb27203547345c70deb2a50c29a6b10a8c438761 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Thu, 17 Oct 2024 01:30:58 +1100
Subject: [PATCH 08/17] dragen: parse trimmer/sv metrics

---
 NAMESPACE                                     |  3 +
 R/dragen.R                                    | 96 +++++++++++++++++++
 R/tso_dragen.R                                | 16 +++-
 man/Wf_dragen.Rd                              | 36 +++++++
 man/dragen_sv_metrics_read.Rd                 | 17 ++++
 man/dragen_trimmer_metrics_read.Rd            | 17 ++++
 man/dragen_vc_metrics_read.Rd                 | 17 ++++
 .../test-roxytest-testexamples-dragen.R       |  2 +-
 8 files changed, 201 insertions(+), 3 deletions(-)
 create mode 100644 man/dragen_sv_metrics_read.Rd
 create mode 100644 man/dragen_trimmer_metrics_read.Rd
 create mode 100644 man/dragen_vc_metrics_read.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 5c6f744..dcd7516 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -38,6 +38,9 @@ export(dr_output_format_valid)
 export(dr_s3_download)
 export(dragen_coverage_metrics_read)
 export(dragen_mapping_metrics_read)
+export(dragen_sv_metrics_read)
+export(dragen_trimmer_metrics_read)
+export(dragen_vc_metrics_read)
 export(dtw_Wf_tso_ctdna_tumor_only)
 export(dtw_Wf_tso_ctdna_tumor_only_v2)
 export(empty_tbl)
diff --git a/R/dragen.R b/R/dragen.R
index 9f66022..21ca6d7 100644
--- a/R/dragen.R
+++ b/R/dragen.R
@@ -9,6 +9,102 @@ dragen_subprefix <- function(x, suffix) {
   sub(suffix, "", s2)
 }
 
+#' Read DRAGEN SV Metrics
+#'
+#' Reads the `sv_metrics.csv` file output from DRAGEN.
+#'
+#' @param x Path to file.
+#'
+#' @return Tibble with metrics.
+#' @export
+dragen_sv_metrics_read <- function(x) {
+  d <- readr::read_lines(x)
+  assertthat::assert_that(grepl("SV SUMMARY", d[1]))
+  abbrev_nm <- c(
+    "Number of deletions (PASS)" = "del",
+    "Number of insertions (PASS)" = "ins",
+    "Number of duplications (PASS)" = "dup",
+    "Number of breakend pairs (PASS)" = "bnd"
+  )
+  d |>
+    tibble::as_tibble_col(column_name = "value") |>
+    dplyr::filter(!grepl("Total number of structural variants", .data$value)) |>
+    tidyr::separate_wider_delim(
+      "value",
+      names = c("svsum", "sample", "var", "count", "pct"), delim = ",",
+      too_few = "align_start"
+    ) |>
+    dplyr::mutate(
+      count = as.numeric(.data$count),
+      var = dplyr::recode(.data$var, !!!abbrev_nm)
+    ) |>
+    dplyr::select("var", "count") |>
+    tidyr::pivot_wider(names_from = "var", values_from = "count")
+}
+
+#' Read DRAGEN Trimmer Metrics
+#'
+#' Reads the `trimmer_metrics.csv` file output from DRAGEN.
+#'
+#' @param x Path to file.
+#'
+#' @return Tibble with metrics.
+#' @export
+dragen_trimmer_metrics_read <- function(x) {
+  d <- readr::read_lines(x)
+  assertthat::assert_that(grepl("TRIMMER STATISTICS", d[1]))
+  abbrev_nm <- c(
+    "Total input reads"                              = "reads_tot_input_dragen",
+    "Total input bases"                              = "bases_tot_dragen",
+    "Total input bases R1"                           = "bases_r1_dragen",
+    "Total input bases R2"                           = "bases_r2_dragen",
+    "Average input read length"                      = "read_len_avg_dragen",
+    "Total trimmed reads"                            = "reads_trimmed_tot_dragen",
+    "Total trimmed bases"                            = "bases_trimmed_tot_dragen",
+    "Average bases trimmed per read"                 = "bases_trimmed_avg_per_read_dragen",
+    "Average bases trimmed per trimmed read"         = "bases_trimmed_avg_per_trimmedread_dragen",
+    "Remaining poly-G K-mers R1 3prime"              = "polygkmers3r1_remaining_dragen",
+    "Remaining poly-G K-mers R2 3prime"              = "polygkmers3r2_remaining_dragen",
+    "Poly-G soft trimmed reads unfiltered R1 3prime" = "polyg_soft_trimmed_reads_unfilt_3r1_dragen",
+    "Poly-G soft trimmed reads unfiltered R2 3prime" = "polyg_soft_trimmed_reads_unfilt_3r2_dragen",
+    "Poly-G soft trimmed reads filtered R1 3prime"   = "polyg_soft_trimmed_reads_filt_3r1_dragen",
+    "Poly-G soft trimmed reads filtered R2 3prime"   = "polyg_soft_trimmed_reads_filt_3r2_dragen",
+    "Poly-G soft trimmed bases unfiltered R1 3prime" = "polyg_soft_trimmed_bases_unfilt_3r1_dragen",
+    "Poly-G soft trimmed bases unfiltered R2 3prime" = "polyg_soft_trimmed_bases_unfilt_3r2_dragen",
+    "Poly-G soft trimmed bases filtered R1 3prime"   = "polyg_soft_trimmed_bases_filt_3r1_dragen",
+    "Poly-G soft trimmed bases filtered R2 3prime"   = "polyg_soft_trimmed_bases_filt_3r2_dragen",
+    "Total filtered reads"                           = "reads_tot_filt_dragen",
+    "Reads filtered for minimum read length R1"      = "reads_filt_minreadlenr1_dragen",
+    "Reads filtered for minimum read length R2"      = "reads_filt_minreadlenr2_dragen"
+  )
+
+  d |>
+    tibble::as_tibble_col(column_name = "value") |>
+    tidyr::separate_wider_delim("value", names = c("category", "extra", "var", "count", "pct"), delim = ",", too_few = "align_start") |>
+    dplyr::mutate(
+      count = as.numeric(.data$count),
+      pct = round(as.numeric(.data$pct), 2),
+      var = dplyr::recode(.data$var, !!!abbrev_nm)
+    ) |>
+    dplyr::select("var", "count", "pct") |>
+    tidyr::pivot_longer(c("count", "pct")) |>
+    dplyr::filter(!is.na(.data$value)) |>
+    dplyr::mutate(
+      name = dplyr::if_else(.data$name == "count", "", "_pct"),
+      var = glue("{.data$var}{.data$name}")
+    ) |>
+    dplyr::select("var", "value") |>
+    tidyr::pivot_wider(names_from = "var", values_from = "value")
+}
+
+#' Read DRAGEN VariantCall Metrics
+#'
+#' Reads the `vc_metrics.csv` file output from DRAGEN.
+#'
+#' @param x Path to file.
+#'
+#' @return Tibble with metrics.
+#' @export
 dragen_vc_metrics_read <- function(x) {
   abbrev_nm1 <- tibble::tribble(
     ~raw, ~clean, ~region,
diff --git a/R/tso_dragen.R b/R/tso_dragen.R
index 22d5768..8964779 100644
--- a/R/tso_dragen.R
+++ b/R/tso_dragen.R
@@ -66,9 +66,9 @@ Wf_dragen <- R6::R6Class(
         glue("{pref}\\.mapping_metrics\\.csv$"), "mappingMetrics",
         glue("{pref}\\.microsat_diffs\\.txt$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.microsat_output\\.json$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.sv_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.sv_metrics\\.csv$"), "svMetrics",
         glue("{pref}\\.time_metrics\\.csv$"), "timeMetrics",
-        glue("{pref}\\.trimmer_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.trimmer_metrics\\.csv$"), "trimmerMetrics",
         glue("{pref}\\.umi_metrics\\.csv$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.vc_metrics\\.csv$"), "vcMetrics"
       )
@@ -218,6 +218,18 @@ Wf_dragen <- R6::R6Class(
       subprefix <- dragen_subprefix(x, "_metrics")
       dat <- dragen_vc_metrics_read(x)
       tibble::tibble(name = glue("vcmetrics_{subprefix}"), data = list(dat[]))
+    },
+    #' @description Read `trimmer_metrics.csv` file.
+    #' @param x Path to file.
+    read_trimmerMetrics = function(x) {
+      dat <- dragen_trimmer_metrics_read(x)
+      tibble::tibble(name = "trimmermetrics", data = list(dat[]))
+    },
+    #' @description Read `sv_metrics.csv` file.
+    #' @param x Path to file.
+    read_svMetrics = function(x) {
+      dat <- dragen_sv_metrics_read(x)
+      tibble::tibble(name = "svmetrics", data = list(dat[]))
     }
   ) # end public
 ) # end Wf_dragen
diff --git a/man/Wf_dragen.Rd b/man/Wf_dragen.Rd
index 57ff416..5745e87 100644
--- a/man/Wf_dragen.Rd
+++ b/man/Wf_dragen.Rd
@@ -53,6 +53,8 @@ d_write <- t1$write(
 \item \href{#method-Wf_dragen-read_hist}{\code{Wf_dragen$read_hist()}}
 \item \href{#method-Wf_dragen-read_timeMetrics}{\code{Wf_dragen$read_timeMetrics()}}
 \item \href{#method-Wf_dragen-read_vcMetrics}{\code{Wf_dragen$read_vcMetrics()}}
+\item \href{#method-Wf_dragen-read_trimmerMetrics}{\code{Wf_dragen$read_trimmerMetrics()}}
+\item \href{#method-Wf_dragen-read_svMetrics}{\code{Wf_dragen$read_svMetrics()}}
 \item \href{#method-Wf_dragen-clone}{\code{Wf_dragen$clone()}}
 }
 }
@@ -251,6 +253,40 @@ Read \code{vc_metrics.csv}/\code{gvcf_metrics.csv} file.
 \if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_vcMetrics(x)}\if{html}{\out{</div>}}
 }
 
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_trimmerMetrics"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_trimmerMetrics}{}}}
+\subsection{Method \code{read_trimmerMetrics()}}{
+Read \code{trimmer_metrics.csv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_trimmerMetrics(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_svMetrics"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_svMetrics}{}}}
+\subsection{Method \code{read_svMetrics()}}{
+Read \code{sv_metrics.csv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_svMetrics(x)}\if{html}{\out{</div>}}
+}
+
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
diff --git a/man/dragen_sv_metrics_read.Rd b/man/dragen_sv_metrics_read.Rd
new file mode 100644
index 0000000..50577e0
--- /dev/null
+++ b/man/dragen_sv_metrics_read.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dragen.R
+\name{dragen_sv_metrics_read}
+\alias{dragen_sv_metrics_read}
+\title{Read DRAGEN SV Metrics}
+\usage{
+dragen_sv_metrics_read(x)
+}
+\arguments{
+\item{x}{Path to file.}
+}
+\value{
+Tibble with metrics.
+}
+\description{
+Reads the \code{sv_metrics.csv} file output from DRAGEN.
+}
diff --git a/man/dragen_trimmer_metrics_read.Rd b/man/dragen_trimmer_metrics_read.Rd
new file mode 100644
index 0000000..806a86f
--- /dev/null
+++ b/man/dragen_trimmer_metrics_read.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dragen.R
+\name{dragen_trimmer_metrics_read}
+\alias{dragen_trimmer_metrics_read}
+\title{Read DRAGEN Trimmer Metrics}
+\usage{
+dragen_trimmer_metrics_read(x)
+}
+\arguments{
+\item{x}{Path to file.}
+}
+\value{
+Tibble with metrics.
+}
+\description{
+Reads the \code{trimmer_metrics.csv} file output from DRAGEN.
+}
diff --git a/man/dragen_vc_metrics_read.Rd b/man/dragen_vc_metrics_read.Rd
new file mode 100644
index 0000000..9bd4dae
--- /dev/null
+++ b/man/dragen_vc_metrics_read.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dragen.R
+\name{dragen_vc_metrics_read}
+\alias{dragen_vc_metrics_read}
+\title{Read DRAGEN VariantCall Metrics}
+\usage{
+dragen_vc_metrics_read(x)
+}
+\arguments{
+\item{x}{Path to file.}
+}
+\value{
+Tibble with metrics.
+}
+\description{
+Reads the \code{vc_metrics.csv} file output from DRAGEN.
+}
diff --git a/tests/testthat/test-roxytest-testexamples-dragen.R b/tests/testthat/test-roxytest-testexamples-dragen.R
index e220df4..52fdda4 100644
--- a/tests/testthat/test-roxytest-testexamples-dragen.R
+++ b/tests/testthat/test-roxytest-testexamples-dragen.R
@@ -2,7 +2,7 @@
 
 # File R/dragen.R: @testexamples
 
-test_that("Function time_metrics_process() @ L1097", {
+test_that("Function time_metrics_process() @ L1193", {
   
   p <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys")
   x <- TimeMetricsFile$new(p)

From 84c3c2c8703cace31d0bab638466752c38c68bd4 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Thu, 17 Oct 2024 10:08:48 +1100
Subject: [PATCH 09/17] dragen: parse fastqc/cnv metrics

---
 NAMESPACE                                     |  2 +
 R/dragen.R                                    | 80 ++++++++++++++++---
 R/dragen_fastqc.R                             | 24 +++++-
 R/tso_dragen.R                                | 16 +++-
 man/Wf_dragen.Rd                              | 36 +++++++++
 man/dragen_cnv_metrics_read.Rd                | 17 ++++
 man/dragen_fastqc_metrics_read.Rd             | 17 ++++
 .../test-roxytest-testexamples-dragen.R       |  2 +-
 8 files changed, 179 insertions(+), 15 deletions(-)
 create mode 100644 man/dragen_cnv_metrics_read.Rd
 create mode 100644 man/dragen_fastqc_metrics_read.Rd

diff --git a/NAMESPACE b/NAMESPACE
index dcd7516..a496196 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -36,7 +36,9 @@ export(dr_func_eval)
 export(dr_gds_download)
 export(dr_output_format_valid)
 export(dr_s3_download)
+export(dragen_cnv_metrics_read)
 export(dragen_coverage_metrics_read)
+export(dragen_fastqc_metrics_read)
 export(dragen_mapping_metrics_read)
 export(dragen_sv_metrics_read)
 export(dragen_trimmer_metrics_read)
diff --git a/R/dragen.R b/R/dragen.R
index 21ca6d7..bde01f9 100644
--- a/R/dragen.R
+++ b/R/dragen.R
@@ -1,12 +1,63 @@
-dragen_subprefix <- function(x, suffix) {
-  # L2401290.exon_contig_mean_cov.csv -> exon
-  # L2401290.target_bed_contig_mean_cov.csv -> target_bed
-  # L2401290.tmb_contig_mean_cov.csv -> tmb
-  # L2401290.wgs_contig_mean_cov.csv -> wgs
-  bname <- basename(x)
-  s1 <- tools::file_path_sans_ext(bname)
-  s2 <- sub(".*\\.(.*)", "\\1", s1)
-  sub(suffix, "", s2)
+#' Read DRAGEN CNV Metrics
+#'
+#' Reads the `cnv_metrics.csv` file output from DRAGEN.
+#'
+#' @param x Path to file.
+#'
+#' @return Tibble with metrics.
+#' @export
+dragen_cnv_metrics_read <- function(x) {
+  d0 <- readr::read_lines(x)
+  assertthat::assert_that(grepl("SEX GENOTYPER", d0[1]))
+  abbrev_nm <- c(
+    "Bases in reference genome" = "bases_in_ref_genome_dragen",
+    "Average alignment coverage over genome" = "cov_alignment_avg_over_genome_dragen",
+    "Number of alignment records" = "n_alignment_records",
+    "Number of filtered records (total)" = "n_filtered_records_tot",
+    "Number of filtered records (duplicates)" = "n_filtered_records_dup",
+    "Number of filtered records (MAPQ)" = "n_filtered_records_mapq",
+    "Number of filtered records (unmapped)" = "n_filtered_records_unmap",
+    "Coverage MAD" = "coverage_mad",
+    "Gene Scaled MAD" = "gene_scaled_mad",
+    "Median Bin Count" = "median_bin_count",
+    "Number of target intervals" = "n_target_intervals",
+    "Number of normal samples" = "n_samp_norm",
+    "Number of segments" = "n_seg",
+    "Number of amplifications" = "n_amp",
+    "Number of deletions" = "n_del",
+    "Number of passing amplifications" = "n_amp_pass",
+    "Number of passing deletions" = "n_del_pass"
+  )
+
+  d1 <- d0 |>
+    tibble::as_tibble_col(column_name = "value") |>
+    tidyr::separate_wider_delim(
+      "value",
+      names = c("category", "extra", "var", "count", "pct"),
+      delim = ",", too_few = "align_start"
+    )
+  sexgt <- d1 |>
+    dplyr::filter(.data$category == "SEX GENOTYPER") |>
+    dplyr::select(sexgt = "count", sexgt_pct = "pct")
+
+  d2 <- d1 |>
+    dplyr::filter(!.data$category == "SEX GENOTYPER") |>
+    dplyr::mutate(
+      count = as.numeric(.data$count),
+      pct = round(as.numeric(.data$pct), 2),
+      var = dplyr::recode(.data$var, !!!abbrev_nm)
+    ) |>
+    dplyr::select("var", "count", "pct") |>
+    tidyr::pivot_longer(c("count", "pct")) |>
+    dplyr::filter(!is.na(.data$value)) |>
+    dplyr::mutate(
+      name = dplyr::if_else(.data$name == "count", "", "_pct"),
+      var = glue("{.data$var}{.data$name}")
+    ) |>
+    dplyr::select("var", "value") |>
+    tidyr::pivot_wider(names_from = "var", values_from = "value")
+  res <- dplyr::bind_cols(sexgt, d2)
+  return(res)
 }
 
 #' Read DRAGEN SV Metrics
@@ -1514,3 +1565,14 @@ WgsHistFile <- R6::R6Class(
     }
   )
 )
+
+dragen_subprefix <- function(x, suffix) {
+  # L2401290.exon_contig_mean_cov.csv -> exon
+  # L2401290.target_bed_contig_mean_cov.csv -> target_bed
+  # L2401290.tmb_contig_mean_cov.csv -> tmb
+  # L2401290.wgs_contig_mean_cov.csv -> wgs
+  bname <- basename(x)
+  s1 <- tools::file_path_sans_ext(bname)
+  s2 <- sub(".*\\.(.*)", "\\1", s1)
+  sub(suffix, "", s2)
+}
diff --git a/R/dragen_fastqc.R b/R/dragen_fastqc.R
index c272e20..3615cd6 100644
--- a/R/dragen_fastqc.R
+++ b/R/dragen_fastqc.R
@@ -22,7 +22,7 @@ FastqcMetricsFile <- R6::R6Class(
     #' @return tibble. TODO.
     read = function() {
       x <- self$path
-      res <- read_fastqc_metrics(x)
+      res <- dragen_fastqc_metrics_read(x)
     },
     #' @description
     #' Writes a tidy version of the `fastqc_metrics.csv` file output
@@ -53,7 +53,15 @@ FastqcMetricsFile <- R6::R6Class(
   )
 )
 
-read_fastqc_metrics <- function(x) {
+#' DRAGEN FASTQC Metrics
+#'
+#' Read DRAGEN `fastqc_metrics.csv` file.
+#'
+#' @param x Path to file.
+#'
+#' @return Tibble with metrics.
+#' @export
+dragen_fastqc_metrics_read <- function(x) {
   # 'SEQUENCE POSITIONS' has an extra field for 'Total Sequence Starts' which
   # can be filtered out since that can be computed by the rest of that section.
   raw <- readr::read_lines(x) |>
@@ -70,6 +78,15 @@ read_fastqc_metrics <- function(x) {
       value = as.numeric(.data$value)
     )
 
+  # 1 POSITIONAL BASE CONTENT
+  # 2 POSITIONAL BASE MEAN QUALITY
+  # 3 POSITIONAL QUALITY
+  # 4 READ GC CONTENT
+  # 5 READ GC CONTENT QUALITY
+  # 6 READ LENGTHS
+  # 7 READ MEAN QUALITY
+  # 8 SEQUENCE POSITIONS
+
   # there are binned pos e.g. "149-150"
   # the value is an accumulation, so divide by number of grains
   pos_base_cont <- d |>
@@ -166,5 +183,6 @@ read_fastqc_metrics <- function(x) {
     read_lengths = read_len,
     read_mean_quality = read_mean_qual,
     sequence_positions = seq_pos
-  )
+  ) |>
+    tibble::enframe(name = "fastqc_name", value = "fastqc_value")
 }
diff --git a/R/tso_dragen.R b/R/tso_dragen.R
index 8964779..7030263 100644
--- a/R/tso_dragen.R
+++ b/R/tso_dragen.R
@@ -42,7 +42,7 @@ Wf_dragen <- R6::R6Class(
       reg1 <- tibble::tribble(
         ~regex, ~fun,
         glue("{pref}\\-replay\\.json$"), "replay",
-        glue("{pref}\\.cnv_metrics.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.cnv_metrics.csv$"), "cnvMetrics",
         glue("{pref}\\.exon_contig_mean_cov\\.csv$"), "contigMeanCov",
         glue("{pref}\\.target_bed_contig_mean_cov\\.csv$"), "contigMeanCov",
         glue("{pref}\\.tmb_contig_mean_cov\\.csv$"), "contigMeanCov",
@@ -59,7 +59,7 @@ Wf_dragen <- R6::R6Class(
         glue("{pref}\\.target_bed_hist\\.csv$"), "hist",
         glue("{pref}\\.tmb_hist\\.csv$"), "hist",
         glue("{pref}\\.wgs_hist\\.csv$"), "hist",
-        glue("{pref}\\.fastqc_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.fastqc_metrics\\.csv$"), "fastqcMetrics",
         glue("{pref}\\.fragment_length_hist\\.csv$"), "fragmentLengthHist",
         glue("{pref}\\.gc_metrics\\.csv$"), "DOWNLOAD_ONLY",
         glue("{pref}\\.gvcf_metrics\\.csv$"), "vcMetrics",
@@ -230,6 +230,18 @@ Wf_dragen <- R6::R6Class(
     read_svMetrics = function(x) {
       dat <- dragen_sv_metrics_read(x)
       tibble::tibble(name = "svmetrics", data = list(dat[]))
+    },
+    #' @description Read `cnv_metrics.csv` file.
+    #' @param x Path to file.
+    read_cnvMetrics = function(x) {
+      dat <- dragen_cnv_metrics_read(x)
+      tibble::tibble(name = "cnvmetrics", data = list(dat[]))
+    },
+    #' @description Read `fastqc_metrics.csv` file.
+    #' @param x Path to file.
+    read_fastqcMetrics = function(x) {
+      dat <- dragen_fastqc_metrics_read(x)
+      tibble::tibble(name = "fastqcmetrics", data = list(dat[]))
     }
   ) # end public
 ) # end Wf_dragen
diff --git a/man/Wf_dragen.Rd b/man/Wf_dragen.Rd
index 5745e87..6e6a763 100644
--- a/man/Wf_dragen.Rd
+++ b/man/Wf_dragen.Rd
@@ -55,6 +55,8 @@ d_write <- t1$write(
 \item \href{#method-Wf_dragen-read_vcMetrics}{\code{Wf_dragen$read_vcMetrics()}}
 \item \href{#method-Wf_dragen-read_trimmerMetrics}{\code{Wf_dragen$read_trimmerMetrics()}}
 \item \href{#method-Wf_dragen-read_svMetrics}{\code{Wf_dragen$read_svMetrics()}}
+\item \href{#method-Wf_dragen-read_cnvMetrics}{\code{Wf_dragen$read_cnvMetrics()}}
+\item \href{#method-Wf_dragen-read_fastqcMetrics}{\code{Wf_dragen$read_fastqcMetrics()}}
 \item \href{#method-Wf_dragen-clone}{\code{Wf_dragen$clone()}}
 }
 }
@@ -287,6 +289,40 @@ Read \code{sv_metrics.csv} file.
 \if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_svMetrics(x)}\if{html}{\out{</div>}}
 }
 
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_cnvMetrics"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_cnvMetrics}{}}}
+\subsection{Method \code{read_cnvMetrics()}}{
+Read \code{cnv_metrics.csv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_cnvMetrics(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_fastqcMetrics"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_fastqcMetrics}{}}}
+\subsection{Method \code{read_fastqcMetrics()}}{
+Read \code{fastqc_metrics.csv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_fastqcMetrics(x)}\if{html}{\out{</div>}}
+}
+
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
diff --git a/man/dragen_cnv_metrics_read.Rd b/man/dragen_cnv_metrics_read.Rd
new file mode 100644
index 0000000..c6e7281
--- /dev/null
+++ b/man/dragen_cnv_metrics_read.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dragen.R
+\name{dragen_cnv_metrics_read}
+\alias{dragen_cnv_metrics_read}
+\title{Read DRAGEN CNV Metrics}
+\usage{
+dragen_cnv_metrics_read(x)
+}
+\arguments{
+\item{x}{Path to file.}
+}
+\value{
+Tibble with metrics.
+}
+\description{
+Reads the \code{cnv_metrics.csv} file output from DRAGEN.
+}
diff --git a/man/dragen_fastqc_metrics_read.Rd b/man/dragen_fastqc_metrics_read.Rd
new file mode 100644
index 0000000..018a75d
--- /dev/null
+++ b/man/dragen_fastqc_metrics_read.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dragen_fastqc.R
+\name{dragen_fastqc_metrics_read}
+\alias{dragen_fastqc_metrics_read}
+\title{DRAGEN FASTQC Metrics}
+\usage{
+dragen_fastqc_metrics_read(x)
+}
+\arguments{
+\item{x}{Path to file.}
+}
+\value{
+Tibble with metrics.
+}
+\description{
+Read DRAGEN \code{fastqc_metrics.csv} file.
+}
diff --git a/tests/testthat/test-roxytest-testexamples-dragen.R b/tests/testthat/test-roxytest-testexamples-dragen.R
index 52fdda4..73866fd 100644
--- a/tests/testthat/test-roxytest-testexamples-dragen.R
+++ b/tests/testthat/test-roxytest-testexamples-dragen.R
@@ -2,7 +2,7 @@
 
 # File R/dragen.R: @testexamples
 
-test_that("Function time_metrics_process() @ L1193", {
+test_that("Function time_metrics_process() @ L1244", {
   
   p <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys")
   x <- TimeMetricsFile$new(p)

From c69d26fd6717a2927fc15396841591dd39e6d3d0 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Thu, 17 Oct 2024 17:23:39 +1100
Subject: [PATCH 10/17] dragen: parse gc metrics

---
 NAMESPACE                                     |  1 +
 R/dragen.R                                    | 70 ++++++++++++++++++-
 R/tso_dragen.R                                |  8 ++-
 man/Wf_dragen.Rd                              | 18 +++++
 man/dragen_gc_metrics_read.Rd                 | 17 +++++
 .../test-roxytest-testexamples-dragen.R       |  2 +-
 6 files changed, 113 insertions(+), 3 deletions(-)
 create mode 100644 man/dragen_gc_metrics_read.Rd

diff --git a/NAMESPACE b/NAMESPACE
index a496196..4a1f49b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -39,6 +39,7 @@ export(dr_s3_download)
 export(dragen_cnv_metrics_read)
 export(dragen_coverage_metrics_read)
 export(dragen_fastqc_metrics_read)
+export(dragen_gc_metrics_read)
 export(dragen_mapping_metrics_read)
 export(dragen_sv_metrics_read)
 export(dragen_trimmer_metrics_read)
diff --git a/R/dragen.R b/R/dragen.R
index bde01f9..fbebdc6 100644
--- a/R/dragen.R
+++ b/R/dragen.R
@@ -1,3 +1,72 @@
+#' Read DRAGEN GC Metrics
+#'
+#' Reads the `gc_metrics.csv` file output from DRAGEN.
+#'
+#' @param x Path to file.
+#'
+#' @return Tibble with metrics.
+#' @export
+dragen_gc_metrics_read <- function(x) {
+  d0 <- readr::read_lines(x)
+  assertthat::assert_that(grepl("GC BIAS DETAILS", d0[1]))
+  d1 <- d0 |>
+    tibble::as_tibble_col(column_name = "value") |>
+    tidyr::separate_wider_delim(
+      "value",
+      names = c("category", "dummy1", "name", "value", "fraction"),
+      delim = ",", too_few = "align_start"
+    )
+  abbrev_nm <- c(
+    "Window size" = "window_size",
+    "Number of valid windows" = "n_windows_valid",
+    "Number of discarded windows" = "n_windows_discard",
+    "Average reference GC" = "gc_avg_reference",
+    "Mean global coverage" = "cov_avg_global",
+    "Normalized coverage at GCs 0-19" = "cov_norm_0to19",
+    "Normalized coverage at GCs 20-39" = "cov_norm_20to39",
+    "Normalized coverage at GCs 40-59" = "cov_norm_40to59",
+    "Normalized coverage at GCs 60-79" = "cov_norm_60to79",
+    "Normalized coverage at GCs 80-100" = "cov_norm_80to100",
+    "AT Dropout" = "dropout_at",
+    "GC Dropout" = "dropout_gc"
+  )
+  # GC METRICS SUMMARY
+  summary <- d1 |>
+    dplyr::filter(.data$category == "GC METRICS SUMMARY") |>
+    dplyr::select("name", "value") |>
+    dplyr::mutate(
+      value = as.numeric(.data$value),
+      name = dplyr::recode(.data$name, !!!abbrev_nm)
+    ) |>
+    tidyr::pivot_wider(names_from = "name", values_from = "value")
+
+  # GC BIAS DETAILS
+  details <- d1 |>
+    dplyr::filter(.data$category == "GC BIAS DETAILS") |>
+    dplyr::mutate(
+      gc = sub(".* at GC (.*)", "\\1", .data$name),
+      name = sub("(.*) at GC .*", "\\1", .data$name),
+      name = tolower(.data$name),
+      name = sub(" ", "", .data$name),
+      value = as.numeric(.data$value),
+      fraction = as.numeric(.data$fraction)
+    )
+  details_wind <- details |>
+    dplyr::filter(.data$name == "windows") |>
+    dplyr::select("gc", "value")
+
+  details_cov <- details |>
+    dplyr::filter(.data$name == "normalizedcoverage") |>
+    dplyr::select("gc", "value")
+
+  list(
+    gcmetrics_summary = summary,
+    gcmetrics_windows = details_wind,
+    gcmetrics_coverage = details_cov
+  ) |>
+    tibble::enframe(name = "name", value = "data")
+}
+
 #' Read DRAGEN CNV Metrics
 #'
 #' Reads the `cnv_metrics.csv` file output from DRAGEN.
@@ -28,7 +97,6 @@ dragen_cnv_metrics_read <- function(x) {
     "Number of passing amplifications" = "n_amp_pass",
     "Number of passing deletions" = "n_del_pass"
   )
-
   d1 <- d0 |>
     tibble::as_tibble_col(column_name = "value") |>
     tidyr::separate_wider_delim(
diff --git a/R/tso_dragen.R b/R/tso_dragen.R
index 7030263..03d8845 100644
--- a/R/tso_dragen.R
+++ b/R/tso_dragen.R
@@ -61,7 +61,7 @@ Wf_dragen <- R6::R6Class(
         glue("{pref}\\.wgs_hist\\.csv$"), "hist",
         glue("{pref}\\.fastqc_metrics\\.csv$"), "fastqcMetrics",
         glue("{pref}\\.fragment_length_hist\\.csv$"), "fragmentLengthHist",
-        glue("{pref}\\.gc_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.gc_metrics\\.csv$"), "gcMetrics",
         glue("{pref}\\.gvcf_metrics\\.csv$"), "vcMetrics",
         glue("{pref}\\.mapping_metrics\\.csv$"), "mappingMetrics",
         glue("{pref}\\.microsat_diffs\\.txt$"), "DOWNLOAD_ONLY",
@@ -242,6 +242,12 @@ Wf_dragen <- R6::R6Class(
     read_fastqcMetrics = function(x) {
       dat <- dragen_fastqc_metrics_read(x)
       tibble::tibble(name = "fastqcmetrics", data = list(dat[]))
+    },
+    #' @description Read `gc_metrics.csv` file.
+    #' @param x Path to file.
+    read_gcMetrics = function(x) {
+      dat <- dragen_gc_metrics_read(x)
+      dat
     }
   ) # end public
 ) # end Wf_dragen
diff --git a/man/Wf_dragen.Rd b/man/Wf_dragen.Rd
index 6e6a763..04b3d0e 100644
--- a/man/Wf_dragen.Rd
+++ b/man/Wf_dragen.Rd
@@ -57,6 +57,7 @@ d_write <- t1$write(
 \item \href{#method-Wf_dragen-read_svMetrics}{\code{Wf_dragen$read_svMetrics()}}
 \item \href{#method-Wf_dragen-read_cnvMetrics}{\code{Wf_dragen$read_cnvMetrics()}}
 \item \href{#method-Wf_dragen-read_fastqcMetrics}{\code{Wf_dragen$read_fastqcMetrics()}}
+\item \href{#method-Wf_dragen-read_gcMetrics}{\code{Wf_dragen$read_gcMetrics()}}
 \item \href{#method-Wf_dragen-clone}{\code{Wf_dragen$clone()}}
 }
 }
@@ -323,6 +324,23 @@ Read \code{fastqc_metrics.csv} file.
 \if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_fastqcMetrics(x)}\if{html}{\out{</div>}}
 }
 
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_gcMetrics"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_gcMetrics}{}}}
+\subsection{Method \code{read_gcMetrics()}}{
+Read \code{gc_metrics.csv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_gcMetrics(x)}\if{html}{\out{</div>}}
+}
+
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
diff --git a/man/dragen_gc_metrics_read.Rd b/man/dragen_gc_metrics_read.Rd
new file mode 100644
index 0000000..fc53959
--- /dev/null
+++ b/man/dragen_gc_metrics_read.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dragen.R
+\name{dragen_gc_metrics_read}
+\alias{dragen_gc_metrics_read}
+\title{Read DRAGEN GC Metrics}
+\usage{
+dragen_gc_metrics_read(x)
+}
+\arguments{
+\item{x}{Path to file.}
+}
+\value{
+Tibble with metrics.
+}
+\description{
+Reads the \code{gc_metrics.csv} file output from DRAGEN.
+}
diff --git a/tests/testthat/test-roxytest-testexamples-dragen.R b/tests/testthat/test-roxytest-testexamples-dragen.R
index 73866fd..6d25116 100644
--- a/tests/testthat/test-roxytest-testexamples-dragen.R
+++ b/tests/testthat/test-roxytest-testexamples-dragen.R
@@ -2,7 +2,7 @@
 
 # File R/dragen.R: @testexamples
 
-test_that("Function time_metrics_process() @ L1244", {
+test_that("Function time_metrics_process() @ L1312", {
   
   p <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys")
   x <- TimeMetricsFile$new(p)

From 32d1138830e812e67790e028b270ad811e6821b8 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Fri, 18 Oct 2024 00:26:14 +1100
Subject: [PATCH 11/17] dragen: parse umi metrics

---
 NAMESPACE                                     |   1 +
 R/dragen.R                                    | 655 ++++++++++--------
 R/tso_dragen.R                                |   8 +-
 man/Wf_dragen.Rd                              |  18 +
 man/dragen_umi_metrics_read.Rd                |  17 +
 .../test-roxytest-testexamples-dragen.R       |   2 +-
 6 files changed, 415 insertions(+), 286 deletions(-)
 create mode 100644 man/dragen_umi_metrics_read.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 4a1f49b..8350259 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -43,6 +43,7 @@ export(dragen_gc_metrics_read)
 export(dragen_mapping_metrics_read)
 export(dragen_sv_metrics_read)
 export(dragen_trimmer_metrics_read)
+export(dragen_umi_metrics_read)
 export(dragen_vc_metrics_read)
 export(dtw_Wf_tso_ctdna_tumor_only)
 export(dtw_Wf_tso_ctdna_tumor_only_v2)
diff --git a/R/dragen.R b/R/dragen.R
index fbebdc6..d3587b6 100644
--- a/R/dragen.R
+++ b/R/dragen.R
@@ -1,3 +1,90 @@
+#' Read DRAGEN UMI Metrics
+#'
+#' Reads the `umi_metrics.csv` file output from DRAGEN.
+#'
+#' @param x Path to file.
+#'
+#' @return Tibble with metrics.
+#' @export
+dragen_umi_metrics_read <- function(x) {
+  d0 <- readr::read_lines(x)
+  assertthat::assert_that(grepl("UMI STATISTICS", d0[1]))
+  abbrev_nm <- tibble::tribble(
+    ~raw, ~clean, ~target,
+    "number of reads", "reads_tot", TRUE,
+    "number of reads with valid or correctable umis", "reads_umi_valid_correctable", TRUE,
+    "number of reads in discarded families", "reads_discarded_families", TRUE,
+    "reads filtered out", "reads_filtered_out", FALSE,
+    "reads with all-g umis filtered out", "reads_filtered_out_all_g_umis", FALSE,
+    "reads with uncorrectable umis", "reads_uncorrectable_umis", FALSE,
+    "total number of families", "families_tot", FALSE,
+    "families contextually corrected", "families_contextually_corrected", FALSE,
+    "families shifted", "families_shifted", FALSE,
+    "families discarded", "families_discarded_tot", TRUE,
+    "families discarded by min-support-reads", "families_discarded_minsupportreads", TRUE,
+    "families discarded by duplex/simplex", "families_discarded_duplexsimplex", TRUE,
+    "families with ambiguous correction", "families_ambiguous_correction", TRUE,
+    "duplex families", "duplex_families", TRUE,
+    "consensus pairs emitted", "consensus_pairs_emitted", FALSE,
+    "mean family depth", "avg_family_depth", TRUE,
+    "number of collapsible regions", "collapsible_regions_tot", FALSE,
+    "min collapsible region size (num reads)", "collapsible_region_size_min", FALSE,
+    "max collapsible region size (num reads)", "collapsible_region_size_max", FALSE,
+    "mean collapsible region size (num reads)", "collapsible_region_size_mean", FALSE,
+    "collapsible region size standard deviation", "collapsible_region_size_sd", FALSE,
+    "histogram of num supporting fragments", "histo_num_supporting_fragments", TRUE,
+    "histogram of unique umis per fragment position", "histo_unique_umis", FALSE
+  )
+  abbrev_nm_target <- abbrev_nm |>
+    dplyr::filter(.data$target) |>
+    dplyr::mutate(
+      raw = as.character(glue("on target {.data$raw}")),
+      clean = as.character(glue("{.data$clean}_ontarget"))
+    )
+  abbrev_nm <- abbrev_nm |>
+    dplyr::bind_rows(abbrev_nm_target) |>
+    dplyr::select("raw", "clean") |>
+    tibble::deframe()
+  d1 <- d0 |>
+    tibble::as_tibble_col(column_name = "value") |>
+    tidyr::separate_wider_delim(
+      "value",
+      names = c("category", "dummy1", "var", "count", "pct"),
+      delim = ",", too_few = "align_start"
+    ) |>
+    dplyr::mutate(
+      var = tolower(.data$var),
+      var = dplyr::recode(.data$var, !!!abbrev_nm)
+    )
+  hist <- d1 |>
+    dplyr::filter(grepl("histo", .data$var)) |>
+    dplyr::select(name = "var", "count") |>
+    dplyr::mutate(
+      count = gsub("\\{|\\}", "", .data$count),
+      count = strsplit(.data$count, "\\|")
+    ) |>
+    tidyr::unnest(count) |>
+    dplyr::mutate(count = as.numeric(.data$count)) |>
+    tidyr::nest(.by = "name")
+  d2 <- d1 |>
+    dplyr::filter(!grepl("histo", .data$var)) |>
+    dplyr::select("var", "count", "pct") |>
+    tidyr::pivot_longer(c("count", "pct"), names_to = "name", values_to = "value") |>
+    dplyr::mutate(
+      value = as.numeric(.data$value),
+      name = dplyr::if_else(.data$name == "count", "", "_pct"),
+      var = glue("{.data$var}{.data$name}")
+    ) |>
+    dplyr::select("var", "value") |>
+    dplyr::filter(!is.na(.data$value)) |>
+    tidyr::pivot_wider(names_from = "var", values_from = "value")
+  res <- list(metrics = d2) |>
+    tibble::enframe(name = "name", value = "data") |>
+    dplyr::bind_rows(hist) |>
+    dplyr::mutate(name = glue("umi_{.data$name}"))
+  res
+}
+
 #' Read DRAGEN GC Metrics
 #'
 #' Reads the `gc_metrics.csv` file output from DRAGEN.
@@ -79,8 +166,8 @@ dragen_cnv_metrics_read <- function(x) {
   d0 <- readr::read_lines(x)
   assertthat::assert_that(grepl("SEX GENOTYPER", d0[1]))
   abbrev_nm <- c(
-    "Bases in reference genome" = "bases_in_ref_genome_dragen",
-    "Average alignment coverage over genome" = "cov_alignment_avg_over_genome_dragen",
+    "Bases in reference genome" = "bases_in_ref_genome",
+    "Average alignment coverage over genome" = "cov_alignment_avg_over_genome",
     "Number of alignment records" = "n_alignment_records",
     "Number of filtered records (total)" = "n_filtered_records_tot",
     "Number of filtered records (duplicates)" = "n_filtered_records_dup",
@@ -173,28 +260,28 @@ dragen_trimmer_metrics_read <- function(x) {
   d <- readr::read_lines(x)
   assertthat::assert_that(grepl("TRIMMER STATISTICS", d[1]))
   abbrev_nm <- c(
-    "Total input reads"                              = "reads_tot_input_dragen",
-    "Total input bases"                              = "bases_tot_dragen",
-    "Total input bases R1"                           = "bases_r1_dragen",
-    "Total input bases R2"                           = "bases_r2_dragen",
-    "Average input read length"                      = "read_len_avg_dragen",
-    "Total trimmed reads"                            = "reads_trimmed_tot_dragen",
-    "Total trimmed bases"                            = "bases_trimmed_tot_dragen",
-    "Average bases trimmed per read"                 = "bases_trimmed_avg_per_read_dragen",
-    "Average bases trimmed per trimmed read"         = "bases_trimmed_avg_per_trimmedread_dragen",
-    "Remaining poly-G K-mers R1 3prime"              = "polygkmers3r1_remaining_dragen",
-    "Remaining poly-G K-mers R2 3prime"              = "polygkmers3r2_remaining_dragen",
-    "Poly-G soft trimmed reads unfiltered R1 3prime" = "polyg_soft_trimmed_reads_unfilt_3r1_dragen",
-    "Poly-G soft trimmed reads unfiltered R2 3prime" = "polyg_soft_trimmed_reads_unfilt_3r2_dragen",
-    "Poly-G soft trimmed reads filtered R1 3prime"   = "polyg_soft_trimmed_reads_filt_3r1_dragen",
-    "Poly-G soft trimmed reads filtered R2 3prime"   = "polyg_soft_trimmed_reads_filt_3r2_dragen",
-    "Poly-G soft trimmed bases unfiltered R1 3prime" = "polyg_soft_trimmed_bases_unfilt_3r1_dragen",
-    "Poly-G soft trimmed bases unfiltered R2 3prime" = "polyg_soft_trimmed_bases_unfilt_3r2_dragen",
-    "Poly-G soft trimmed bases filtered R1 3prime"   = "polyg_soft_trimmed_bases_filt_3r1_dragen",
-    "Poly-G soft trimmed bases filtered R2 3prime"   = "polyg_soft_trimmed_bases_filt_3r2_dragen",
-    "Total filtered reads"                           = "reads_tot_filt_dragen",
-    "Reads filtered for minimum read length R1"      = "reads_filt_minreadlenr1_dragen",
-    "Reads filtered for minimum read length R2"      = "reads_filt_minreadlenr2_dragen"
+    "Total input reads"                              = "reads_tot_input",
+    "Total input bases"                              = "bases_tot",
+    "Total input bases R1"                           = "bases_r1",
+    "Total input bases R2"                           = "bases_r2",
+    "Average input read length"                      = "read_len_avg",
+    "Total trimmed reads"                            = "reads_trimmed_tot",
+    "Total trimmed bases"                            = "bases_trimmed_tot",
+    "Average bases trimmed per read"                 = "bases_trimmed_avg_per_read",
+    "Average bases trimmed per trimmed read"         = "bases_trimmed_avg_per_trimmedread",
+    "Remaining poly-G K-mers R1 3prime"              = "polygkmers3r1_remaining",
+    "Remaining poly-G K-mers R2 3prime"              = "polygkmers3r2_remaining",
+    "Poly-G soft trimmed reads unfiltered R1 3prime" = "polyg_soft_trimmed_reads_unfilt_3r1",
+    "Poly-G soft trimmed reads unfiltered R2 3prime" = "polyg_soft_trimmed_reads_unfilt_3r2",
+    "Poly-G soft trimmed reads filtered R1 3prime"   = "polyg_soft_trimmed_reads_filt_3r1",
+    "Poly-G soft trimmed reads filtered R2 3prime"   = "polyg_soft_trimmed_reads_filt_3r2",
+    "Poly-G soft trimmed bases unfiltered R1 3prime" = "polyg_soft_trimmed_bases_unfilt_3r1",
+    "Poly-G soft trimmed bases unfiltered R2 3prime" = "polyg_soft_trimmed_bases_unfilt_3r2",
+    "Poly-G soft trimmed bases filtered R1 3prime"   = "polyg_soft_trimmed_bases_filt_3r1",
+    "Poly-G soft trimmed bases filtered R2 3prime"   = "polyg_soft_trimmed_bases_filt_3r2",
+    "Total filtered reads"                           = "reads_tot_filt",
+    "Reads filtered for minimum read length R1"      = "reads_filt_minreadlenr1",
+    "Reads filtered for minimum read length R2"      = "reads_filt_minreadlenr2"
   )
 
   d |>
@@ -227,31 +314,31 @@ dragen_trimmer_metrics_read <- function(x) {
 dragen_vc_metrics_read <- function(x) {
   abbrev_nm1 <- tibble::tribble(
     ~raw, ~clean, ~region,
-    "Total", "var_tot_dragen", FALSE,
-    "Biallelic", "var_biallelic_dragen", FALSE,
-    "Multiallelic", "var_multiallelic_dragen", FALSE,
-    "SNPs", "var_snp_dragen", FALSE,
-    "Insertions (Hom)", "var_ins_hom_dragen", FALSE,
-    "Insertions (Het)", "var_ins_het_dragen", FALSE,
-    "Deletions (Hom)", "var_del_hom_dragen", FALSE,
-    "Deletions (Het)", "var_del_het_dragen", FALSE,
-    "Indels (Het)", "var_indel_het_dragen", FALSE,
+    "Total", "var_tot", FALSE,
+    "Biallelic", "var_biallelic", FALSE,
+    "Multiallelic", "var_multiallelic", FALSE,
+    "SNPs", "var_snp", FALSE,
+    "Insertions (Hom)", "var_ins_hom", FALSE,
+    "Insertions (Het)", "var_ins_het", FALSE,
+    "Deletions (Hom)", "var_del_hom", FALSE,
+    "Deletions (Het)", "var_del_het", FALSE,
+    "Indels (Het)", "var_indel_het", FALSE,
     "Chr X number of SNPs over ", "var_snp_x_over_", TRUE,
     "Chr Y number of SNPs over ", "var_snp_y_over_", TRUE,
     "(Chr X SNPs)/(chr Y SNPs) ratio over ", "var_x_over_y_snp_ratio_over_", TRUE,
-    "SNP Transitions", "var_snp_transitions_dragen", FALSE,
-    "SNP Transversions", "var_snp_transversions_dragen", FALSE,
-    "Ti/Tv ratio", "var_ti_tv_ratio_dragen", FALSE,
-    "Heterozygous", "var_heterozygous_dragen", FALSE,
-    "Homozygous", "var_homozygous_dragen", FALSE,
-    "Het/Hom ratio", "var_het_hom_ratio_dragen", FALSE,
-    "In dbSNP", "var_in_dbsnp_dragen", FALSE,
-    "Not in dbSNP", "var_nin_dbsnp_dragen", FALSE,
-    "Percent Callability", "callability_pct_dragen", FALSE,
-    "Percent Autosome Callability", "callability_auto_pct_dragen", FALSE,
-    "Number of samples", "sample_num_dragen", FALSE,
-    "Reads Processed", "reads_processed_dragen", FALSE,
-    "Child Sample", "sample_child_dragen", FALSE
+    "SNP Transitions", "var_snp_transitions", FALSE,
+    "SNP Transversions", "var_snp_transversions", FALSE,
+    "Ti/Tv ratio", "var_ti_tv_ratio", FALSE,
+    "Heterozygous", "var_heterozygous", FALSE,
+    "Homozygous", "var_homozygous", FALSE,
+    "Het/Hom ratio", "var_het_hom_ratio", FALSE,
+    "In dbSNP", "var_in_dbsnp", FALSE,
+    "Not in dbSNP", "var_nin_dbsnp", FALSE,
+    "Percent Callability", "callability_pct", FALSE,
+    "Percent Autosome Callability", "callability_auto_pct", FALSE,
+    "Number of samples", "sample_num", FALSE,
+    "Reads Processed", "reads_processed", FALSE,
+    "Child Sample", "sample_child", FALSE
   )
   raw <- readr::read_lines(x)
   assertthat::assert_that(grepl("VARIANT CALLER", raw[1]))
@@ -285,7 +372,7 @@ dragen_vc_metrics_read <- function(x) {
   abbrev_nm <- abbrev_nm1 |>
     dplyr::mutate(
       raw = ifelse(.data$region, glue("{.data$raw}{str1}"), .data$raw),
-      clean = ifelse(.data$region, glue("{.data$clean}{reg1}_dragen"), .data$clean)
+      clean = ifelse(.data$region, glue("{.data$clean}{reg1}"), .data$clean)
     ) |>
     dplyr::select("raw", "clean") |>
     tibble::deframe()
@@ -327,78 +414,78 @@ dragen_vc_metrics_read <- function(x) {
 #' @export
 dragen_mapping_metrics_read <- function(x) {
   abbrev_nm <- c(
-    "Total input reads" = "reads_tot_input_dragen",
-    "Number of duplicate marked reads" = "reads_num_dupmarked_dragen",
-    "Number of duplicate marked and mate reads removed" = "reads_num_dupmarked_mate_reads_removed_dragen",
-    "Number of unique reads (excl. duplicate marked reads)" = "reads_num_uniq_dragen",
-    "Reads with mate sequenced" = "reads_w_mate_seq_dragen",
-    "Reads without mate sequenced" = "reads_wo_mate_seq_dragen",
-    "QC-failed reads" = "reads_qcfail_dragen",
-    "Mapped reads" = "reads_mapped_dragen",
-    "Mapped reads adjusted for filtered mapping" = "reads_mapped_adjfilt_dragen",
-    "Mapped reads R1" = "reads_mapped_r1_dragen",
-    "Mapped reads R2" = "reads_mapped_r2_dragen",
-    "Number of unique & mapped reads (excl. duplicate marked reads)" = "reads_num_uniq_mapped_dragen",
-    "Unmapped reads" = "reads_unmapped_dragen",
-    "Unmapped reads adjusted for filtered mapping" = "reads_unmapped_adjfilt_dragen",
-    "Adjustment of reads matching non-reference decoys" = "reads_match_nonref_decoys_adj_dragen",
-    "Singleton reads (itself mapped; mate unmapped)" = "reads_singleton_dragen",
-    "Paired reads (itself & mate mapped)" = "reads_paired_dragen",
-    "Properly paired reads" = "reads_paired_proper_dragen",
-    "Not properly paired reads (discordant)" = "reads_discordant_dragen",
-    "Paired reads mapped to different chromosomes" = "reads_paired_mapped_diff_chrom_dragen",
-    "Paired reads mapped to different chromosomes (MAPQ>=10)" = "reads_paired_mapped_diff_chrom_mapq10_dragen",
-    "Reads with MAPQ [40:inf)" = "reads_mapq_40_inf_dragen",
-    "Reads with MAPQ [30:40)" = "reads_mapq_30_40_dragen",
-    "Reads with MAPQ [20:30)" = "reads_mapq_20_30_dragen",
-    "Reads with MAPQ [10:20)" = "reads_mapq_10_20_dragen",
-    "Reads with MAPQ [ 0:10)" = "reads_mapq_0_10_dragen",
-    "Reads with MAPQ NA (Unmapped reads)" = "reads_mapq_na_unmapped_dragen",
-    "Reads with indel R1" = "reads_indel_r1_dragen",
-    "Reads with indel R2" = "reads_indel_r2_dragen",
-    "Total bases" = "bases_tot_dragen",
-    "Total bases R1" = "bases_tot_r1_dragen",
-    "Total bases R2" = "bases_tot_r2_dragen",
-    "Mapped bases" = "bases_mapped_dragen",
-    "Mapped bases R1" = "bases_mapped_r1_dragen",
-    "Mapped bases R2" = "bases_mapped_r2_dragen",
-    "Soft-clipped bases" = "bases_softclip_dragen",
-    "Soft-clipped bases R1" = "bases_softclip_r1_dragen",
-    "Soft-clipped bases R2" = "bases_softclip_r2_dragen",
-    "Hard-clipped bases" = "bases_hardclip_dragen",
-    "Hard-clipped bases R1" = "bases_hardclip_r1_dragen",
-    "Hard-clipped bases R2" = "bases_hardclip_r2_dragen",
-    "Mismatched bases R1" = "bases_mismatched_r1_dragen",
-    "Mismatched bases R2" = "bases_mismatched_r2_dragen",
-    "Mismatched bases R1 (excl. indels)" = "bases_mismatched_r1_noindels_dragen",
-    "Mismatched bases R2 (excl. indels)" = "bases_mismatched_r2_noindels_dragen",
-    "Q30 bases" = "bases_q30_dragen",
-    "Q30 bases R1" = "bases_q30_r1_dragen",
-    "Q30 bases R2" = "bases_q30_r2_dragen",
-    "Q30 bases (excl. dups & clipped bases)" = "bases_q30_nodups_noclipped_dragen",
-    "Total alignments" = "alignments_tot_dragen",
-    "Secondary alignments" = "alignments_secondary_dragen",
-    "Supplementary (chimeric) alignments" = "alignments_chimeric_dragen",
-    "Estimated read length" = "read_len_dragen",
-    "Bases in reference genome" = "bases_in_ref_genome_dragen",
-    "Bases in target bed [% of genome]" = "bases_in_target_bed_genome_pct_dragen",
-    "Insert length: mean" = "insert_len_mean_dragen",
-    "Insert length: median" = "insert_len_median_dragen",
-    "Insert length: standard deviation" = "insert_len_std_dev_dragen",
-    "Provided sex chromosome ploidy" = "ploidy_sex_chrom_provided_dragen",
-    "Estimated sample contamination" = "contamination_est_dragen",
-    "Estimated sample contamination standard error" = "contamination_stderr_est_dragen",
-    "DRAGEN mapping rate [mil. reads/second]" = "mapping_rate_dragen_milreads_per_sec_dragen",
-    "Total reads in RG" = "reads_tot_rg_dragen",
-    "Mapped reads adjusted for excluded mapping" = "reads_mapped_adjexcl_dragen",
-    "Mapped reads adjusted for filtered and excluded mapping" = "reads_mapped_adjfiltexcl_dragen",
-    "Unmapped reads adjusted for excluded mapping" = "reads_unmapped_adjexcl_dragen",
-    "Unmapped reads adjusted for filtered and excluded mapping" = "reads_unmapped_adjfiltexcl_dragen",
-    "Reads mapping to multiple locations" = "reads_map_multiloc_dragen",
-    "Adjustment of reads matching filter contigs" = "reads_match_filt_contig_adj_dragen",
-    "Reads with splice junction" = "reads_splicejunc_dragen",
-    "Average sequenced coverage over genome" = "cov_avg_seq_over_genome_dragen",
-    "Filtered rRNA reads" = "reads_rrna_filtered_dragen"
+    "Total input reads" = "reads_tot_input",
+    "Number of duplicate marked reads" = "reads_num_dupmarked",
+    "Number of duplicate marked and mate reads removed" = "reads_num_dupmarked_mate_reads_removed",
+    "Number of unique reads (excl. duplicate marked reads)" = "reads_num_uniq",
+    "Reads with mate sequenced" = "reads_w_mate_seq",
+    "Reads without mate sequenced" = "reads_wo_mate_seq",
+    "QC-failed reads" = "reads_qcfail",
+    "Mapped reads" = "reads_mapped",
+    "Mapped reads adjusted for filtered mapping" = "reads_mapped_adjfilt",
+    "Mapped reads R1" = "reads_mapped_r1",
+    "Mapped reads R2" = "reads_mapped_r2",
+    "Number of unique & mapped reads (excl. duplicate marked reads)" = "reads_num_uniq_mapped",
+    "Unmapped reads" = "reads_unmapped",
+    "Unmapped reads adjusted for filtered mapping" = "reads_unmapped_adjfilt",
+    "Adjustment of reads matching non-reference decoys" = "reads_match_nonref_decoys_adj",
+    "Singleton reads (itself mapped; mate unmapped)" = "reads_singleton",
+    "Paired reads (itself & mate mapped)" = "reads_paired",
+    "Properly paired reads" = "reads_paired_proper",
+    "Not properly paired reads (discordant)" = "reads_discordant",
+    "Paired reads mapped to different chromosomes" = "reads_paired_mapped_diff_chrom",
+    "Paired reads mapped to different chromosomes (MAPQ>=10)" = "reads_paired_mapped_diff_chrom_mapq10",
+    "Reads with MAPQ [40:inf)" = "reads_mapq_40_inf",
+    "Reads with MAPQ [30:40)" = "reads_mapq_30_40",
+    "Reads with MAPQ [20:30)" = "reads_mapq_20_30",
+    "Reads with MAPQ [10:20)" = "reads_mapq_10_20",
+    "Reads with MAPQ [ 0:10)" = "reads_mapq_0_10",
+    "Reads with MAPQ NA (Unmapped reads)" = "reads_mapq_na_unmapped",
+    "Reads with indel R1" = "reads_indel_r1",
+    "Reads with indel R2" = "reads_indel_r2",
+    "Total bases" = "bases_tot",
+    "Total bases R1" = "bases_tot_r1",
+    "Total bases R2" = "bases_tot_r2",
+    "Mapped bases" = "bases_mapped",
+    "Mapped bases R1" = "bases_mapped_r1",
+    "Mapped bases R2" = "bases_mapped_r2",
+    "Soft-clipped bases" = "bases_softclip",
+    "Soft-clipped bases R1" = "bases_softclip_r1",
+    "Soft-clipped bases R2" = "bases_softclip_r2",
+    "Hard-clipped bases" = "bases_hardclip",
+    "Hard-clipped bases R1" = "bases_hardclip_r1",
+    "Hard-clipped bases R2" = "bases_hardclip_r2",
+    "Mismatched bases R1" = "bases_mismatched_r1",
+    "Mismatched bases R2" = "bases_mismatched_r2",
+    "Mismatched bases R1 (excl. indels)" = "bases_mismatched_r1_noindels",
+    "Mismatched bases R2 (excl. indels)" = "bases_mismatched_r2_noindels",
+    "Q30 bases" = "bases_q30",
+    "Q30 bases R1" = "bases_q30_r1",
+    "Q30 bases R2" = "bases_q30_r2",
+    "Q30 bases (excl. dups & clipped bases)" = "bases_q30_nodups_noclipped",
+    "Total alignments" = "alignments_tot",
+    "Secondary alignments" = "alignments_secondary",
+    "Supplementary (chimeric) alignments" = "alignments_chimeric",
+    "Estimated read length" = "read_len",
+    "Bases in reference genome" = "bases_in_ref_genome",
+    "Bases in target bed [% of genome]" = "bases_in_target_bed_genome_pct",
+    "Insert length: mean" = "insert_len_mean",
+    "Insert length: median" = "insert_len_median",
+    "Insert length: standard deviation" = "insert_len_std_dev",
+    "Provided sex chromosome ploidy" = "ploidy_sex_chrom_provided",
+    "Estimated sample contamination" = "contamination_est",
+    "Estimated sample contamination standard error" = "contamination_stderr_est",
+    "DRAGEN mapping rate [mil. reads/second]" = "mapping_rate_dragen_milreads_per_sec",
+    "Total reads in RG" = "reads_tot_rg",
+    "Mapped reads adjusted for excluded mapping" = "reads_mapped_adjexcl",
+    "Mapped reads adjusted for filtered and excluded mapping" = "reads_mapped_adjfiltexcl",
+    "Unmapped reads adjusted for excluded mapping" = "reads_unmapped_adjexcl",
+    "Unmapped reads adjusted for filtered and excluded mapping" = "reads_unmapped_adjfiltexcl",
+    "Reads mapping to multiple locations" = "reads_map_multiloc",
+    "Adjustment of reads matching filter contigs" = "reads_match_filt_contig_adj",
+    "Reads with splice junction" = "reads_splicejunc",
+    "Average sequenced coverage over genome" = "cov_avg_seq_over_genome",
+    "Filtered rRNA reads" = "reads_rrna_filtered"
   )
   raw <- readr::read_lines(x)
   assertthat::assert_that(grepl("MAPPING/ALIGNING", raw[1]))
@@ -442,8 +529,8 @@ dragen_coverage_metrics_read <- function(x) {
   # all rows except 'Aligned bases' and 'Aligned reads' refer to the region
   abbrev_nm <- tibble::tribble(
     ~raw, ~clean, ~region,
-    "Aligned bases", "bases_aligned_tot_dragen", FALSE,
-    "Aligned reads", "reads_aligned_tot_dragen", FALSE,
+    "Aligned bases", "bases_aligned_tot", FALSE,
+    "Aligned reads", "reads_aligned_tot", FALSE,
     "Aligned bases in ", "bases_aligned_", TRUE,
     "Average alignment coverage over ", "cov_alignment_avg_over_", TRUE,
     "Uniformity of coverage (PCT > 0.2*mean) over ", "cov_uniformity_pct_gt02mean_", TRUE,
@@ -488,7 +575,7 @@ dragen_coverage_metrics_read <- function(x) {
   abbrev_nm <- abbrev_nm |>
     dplyr::mutate(
       raw = ifelse(.data$region, glue("{.data$raw}{str1}"), .data$raw),
-      clean = ifelse(.data$region, glue("{.data$clean}{reg1}_dragen"), .data$clean)
+      clean = ifelse(.data$region, glue("{.data$clean}{reg1}"), .data$clean)
     ) |>
     dplyr::select("raw", "clean") |>
     tibble::deframe()
@@ -509,7 +596,7 @@ dragen_coverage_metrics_read <- function(x) {
       var = gsub("inf", "Inf", .data$var)
     ) |>
     tidyr::separate_wider_delim("var", names = c("start", "end"), delim = ":") |>
-    dplyr::mutate(var = as.character(glue("cov_pct_{start}_{end}_{reg1}_dragen"))) |>
+    dplyr::mutate(var = as.character(glue("cov_pct_{start}_{end}_{reg1}"))) |>
     dplyr::select("var", "value")
   res <- dplyr::bind_rows(res1, res2) |>
     dplyr::mutate(
@@ -695,19 +782,19 @@ WgsCoverageMetricsFile <- R6::R6Class(
     #' @return tibble with one row and metrics spread across individual columns.
     read = function() {
       abbrev_nm <- c(
-        "Aligned bases"                                       = "bases_aligned_dragen",
-        "Aligned bases in genome"                             = "bases_aligned_in_genome_dragen",
-        "Average alignment coverage over genome"              = "cov_alignment_avg_over_genome_dragen",
-        "Uniformity of coverage (PCT > 0.2*mean) over genome" = "cov_uniformity_over_genome_pct_gt02mean_dragen",
-        "Uniformity of coverage (PCT > 0.4*mean) over genome" = "cov_uniformity_over_genome_pct_gt04mean_dragen",
-        "Average chr X coverage over genome"                  = "cov_avg_x_over_genome_dragen",
-        "Average chr Y coverage over genome"                  = "cov_avg_y_over_genome_dragen",
-        "Average mitochondrial coverage over genome"          = "cov_avg_mt_over_genome_dragen",
-        "Average autosomal coverage over genome"              = "cov_avg_auto_over_genome_dragen",
-        "Median autosomal coverage over genome"               = "cov_median_auto_over_genome_dragen",
-        "Mean/Median autosomal coverage ratio over genome"    = "cov_mean_median_auto_ratio_over_genome_dragen",
-        "Aligned reads"                                       = "reads_aligned_dragen",
-        "Aligned reads in genome"                             = "reads_aligned_in_genome_dragen"
+        "Aligned bases"                                       = "bases_aligned",
+        "Aligned bases in genome"                             = "bases_aligned_in_genome",
+        "Average alignment coverage over genome"              = "cov_alignment_avg_over_genome",
+        "Uniformity of coverage (PCT > 0.2*mean) over genome" = "cov_uniformity_over_genome_pct_gt02mean",
+        "Uniformity of coverage (PCT > 0.4*mean) over genome" = "cov_uniformity_over_genome_pct_gt04mean",
+        "Average chr X coverage over genome"                  = "cov_avg_x_over_genome",
+        "Average chr Y coverage over genome"                  = "cov_avg_y_over_genome",
+        "Average mitochondrial coverage over genome"          = "cov_avg_mt_over_genome",
+        "Average autosomal coverage over genome"              = "cov_avg_auto_over_genome",
+        "Median autosomal coverage over genome"               = "cov_median_auto_over_genome",
+        "Mean/Median autosomal coverage ratio over genome"    = "cov_mean_median_auto_ratio_over_genome",
+        "Aligned reads"                                       = "reads_aligned",
+        "Aligned reads in genome"                             = "reads_aligned_in_genome"
       )
 
       x <- self$path
@@ -736,7 +823,7 @@ WgsCoverageMetricsFile <- R6::R6Class(
           var = gsub("inf", "Inf", .data$var)
         ) |>
         tidyr::separate_wider_delim("var", names = c("start", "end"), delim = ":") |>
-        dplyr::mutate(var = as.character(glue("cov_genome_pct_{start}_{end}_dragen"))) |>
+        dplyr::mutate(var = as.character(glue("cov_genome_pct_{start}_{end}"))) |>
         dplyr::select("var", "value")
       res <- dplyr::bind_rows(res1, res2) |>
         dplyr::mutate(
@@ -973,77 +1060,77 @@ MappingMetricsFile <- R6::R6Class(
     #' @return tibble with one row of X metrics per read group.
     read = function() {
       abbrev_nm <- c(
-        "Total input reads" = "reads_tot_input_dragen",
-        "Number of duplicate marked reads" = "reads_num_dupmarked_dragen",
-        "Number of unique reads (excl. duplicate marked reads)" = "reads_num_uniq_dragen",
-        "Reads with mate sequenced" = "reads_w_mate_seq_dragen",
-        "Reads without mate sequenced" = "reads_wo_mate_seq_dragen",
-        "QC-failed reads" = "reads_qcfail_dragen",
-        "Mapped reads adjusted for excluded mapping" = "reads_mapped_adjexcl_dragen",
-        "Mapped reads adjusted for filtered and excluded mapping" = "reads_mapped_adjfiltexcl_dragen",
-        "Unmapped reads adjusted for excluded mapping" = "reads_unmapped_adjexcl_dragen",
-        "Unmapped reads adjusted for filtered and excluded mapping" = "reads_unmapped_adjfiltexcl_dragen",
-        "Reads mapping to multiple locations" = "reads_map_multiloc_dragen",
-        "Hard-clipped bases R1" = "bases_hardclip_r1_dragen",
-        "Hard-clipped bases R2" = "bases_hardclip_r2_dragen",
-        "Soft-clipped bases" = "bases_softclip_dragen",
-        "Hard-clipped bases" = "bases_hardclip_dragen",
-        "Mapped reads" = "reads_mapped_dragen",
-        "Mapped reads adjusted for filtered mapping" = "reads_mapped_adjfilt_dragen",
-        "Mapped reads R1" = "reads_mapped_r1_dragen",
-        "Mapped reads R2" = "reads_mapped_r2_dragen",
-        "Number of unique & mapped reads (excl. duplicate marked reads)" = "reads_num_uniq_mapped_dragen",
-        "Unmapped reads" = "reads_unmapped_dragen",
-        "Unmapped reads adjusted for filtered mapping" = "reads_unmapped_adjfilt_dragen",
-        "Adjustment of reads matching non-reference decoys" = "reads_match_nonref_decoys_adj_dragen",
-        "Adjustment of reads matching filter contigs" = "reads_match_filt_contig_adj_dragen",
-        "Singleton reads (itself mapped; mate unmapped)" = "reads_singleton_dragen",
-        "Paired reads (itself & mate mapped)" = "reads_paired_dragen",
-        "Properly paired reads" = "reads_paired_proper_dragen",
-        "Not properly paired reads (discordant)" = "reads_discordant_dragen",
-        "Paired reads mapped to different chromosomes" = "reads_paired_mapped_diff_chrom_dragen",
-        "Paired reads mapped to different chromosomes (MAPQ>=10)" = "reads_paired_mapped_diff_chrom_mapq10_dragen",
-        "Reads with MAPQ [40:inf)" = "reads_mapq_40_inf_dragen",
-        "Reads with MAPQ [30:40)" = "reads_mapq_30_40_dragen",
-        "Reads with MAPQ [20:30)" = "reads_mapq_20_30_dragen",
-        "Reads with MAPQ [10:20)" = "reads_mapq_10_20_dragen",
-        "Reads with MAPQ [ 0:10)" = "reads_mapq_0_10_dragen",
-        "Reads with MAPQ NA (Unmapped reads)" = "reads_mapq_na_unmapped_dragen",
-        "Reads with indel R1" = "reads_indel_r1_dragen",
-        "Reads with indel R2" = "reads_indel_r2_dragen",
-        "Reads with splice junction" = "reads_splicejunc_dragen",
-        "Total bases" = "bases_tot_dragen",
-        "Total bases R1" = "bases_tot_r1_dragen",
-        "Total bases R2" = "bases_tot_r2_dragen",
-        "Mapped bases" = "bases_mapped_dragen",
-        "Mapped bases R1" = "bases_mapped_r1_dragen",
-        "Mapped bases R2" = "bases_mapped_r2_dragen",
-        "Soft-clipped bases R1" = "bases_softclip_r1_dragen",
-        "Soft-clipped bases R2" = "bases_softclip_r2_dragen",
-        "Mismatched bases R1" = "bases_mismatched_r1_dragen",
-        "Mismatched bases R2" = "bases_mismatched_r2_dragen",
-        "Mismatched bases R1 (excl. indels)" = "bases_mismatched_r1_noindels_dragen",
-        "Mismatched bases R2 (excl. indels)" = "bases_mismatched_r2_noindels_dragen",
-        "Q30 bases" = "bases_q30_dragen",
-        "Q30 bases R1" = "bases_q30_r1_dragen",
-        "Q30 bases R2" = "bases_q30_r2_dragen",
-        "Q30 bases (excl. dups & clipped bases)" = "bases_q30_nodups_noclipped_dragen",
-        "Total alignments" = "alignments_tot_dragen",
-        "Secondary alignments" = "alignments_secondary_dragen",
-        "Supplementary (chimeric) alignments" = "alignments_chimeric_dragen",
-        "Estimated read length" = "read_len_dragen",
-        "Bases in reference genome" = "bases_in_ref_genome_dragen",
-        "Bases in target bed [% of genome]" = "bases_in_target_bed_genome_pct_dragen",
-        "Average sequenced coverage over genome" = "cov_avg_seq_over_genome_dragen",
-        "Insert length: mean" = "insert_len_mean_dragen",
-        "Insert length: median" = "insert_len_median_dragen",
-        "Insert length: standard deviation" = "insert_len_std_dev_dragen",
-        "Provided sex chromosome ploidy" = "ploidy_sex_chrom_provided_dragen",
-        "Estimated sample contamination" = "contamination_est_dragen",
-        "DRAGEN mapping rate [mil. reads/second]" = "mapping_rate_dragen_milreads_per_sec_dragen",
-        "Number of duplicate marked and mate reads removed" = "reads_num_dupmarked_mate_reads_removed_dragen",
-        "Total reads in RG" = "reads_tot_rg_dragen",
-        "Filtered rRNA reads" = "reads_rrna_filtered_dragen"
+        "Total input reads" = "reads_tot_input",
+        "Number of duplicate marked reads" = "reads_num_dupmarked",
+        "Number of unique reads (excl. duplicate marked reads)" = "reads_num_uniq",
+        "Reads with mate sequenced" = "reads_w_mate_seq",
+        "Reads without mate sequenced" = "reads_wo_mate_seq",
+        "QC-failed reads" = "reads_qcfail",
+        "Mapped reads adjusted for excluded mapping" = "reads_mapped_adjexcl",
+        "Mapped reads adjusted for filtered and excluded mapping" = "reads_mapped_adjfiltexcl",
+        "Unmapped reads adjusted for excluded mapping" = "reads_unmapped_adjexcl",
+        "Unmapped reads adjusted for filtered and excluded mapping" = "reads_unmapped_adjfiltexcl",
+        "Reads mapping to multiple locations" = "reads_map_multiloc",
+        "Hard-clipped bases R1" = "bases_hardclip_r1",
+        "Hard-clipped bases R2" = "bases_hardclip_r2",
+        "Soft-clipped bases" = "bases_softclip",
+        "Hard-clipped bases" = "bases_hardclip",
+        "Mapped reads" = "reads_mapped",
+        "Mapped reads adjusted for filtered mapping" = "reads_mapped_adjfilt",
+        "Mapped reads R1" = "reads_mapped_r1",
+        "Mapped reads R2" = "reads_mapped_r2",
+        "Number of unique & mapped reads (excl. duplicate marked reads)" = "reads_num_uniq_mapped",
+        "Unmapped reads" = "reads_unmapped",
+        "Unmapped reads adjusted for filtered mapping" = "reads_unmapped_adjfilt",
+        "Adjustment of reads matching non-reference decoys" = "reads_match_nonref_decoys_adj",
+        "Adjustment of reads matching filter contigs" = "reads_match_filt_contig_adj",
+        "Singleton reads (itself mapped; mate unmapped)" = "reads_singleton",
+        "Paired reads (itself & mate mapped)" = "reads_paired",
+        "Properly paired reads" = "reads_paired_proper",
+        "Not properly paired reads (discordant)" = "reads_discordant",
+        "Paired reads mapped to different chromosomes" = "reads_paired_mapped_diff_chrom",
+        "Paired reads mapped to different chromosomes (MAPQ>=10)" = "reads_paired_mapped_diff_chrom_mapq10",
+        "Reads with MAPQ [40:inf)" = "reads_mapq_40_inf",
+        "Reads with MAPQ [30:40)" = "reads_mapq_30_40",
+        "Reads with MAPQ [20:30)" = "reads_mapq_20_30",
+        "Reads with MAPQ [10:20)" = "reads_mapq_10_20",
+        "Reads with MAPQ [ 0:10)" = "reads_mapq_0_10",
+        "Reads with MAPQ NA (Unmapped reads)" = "reads_mapq_na_unmapped",
+        "Reads with indel R1" = "reads_indel_r1",
+        "Reads with indel R2" = "reads_indel_r2",
+        "Reads with splice junction" = "reads_splicejunc",
+        "Total bases" = "bases_tot",
+        "Total bases R1" = "bases_tot_r1",
+        "Total bases R2" = "bases_tot_r2",
+        "Mapped bases" = "bases_mapped",
+        "Mapped bases R1" = "bases_mapped_r1",
+        "Mapped bases R2" = "bases_mapped_r2",
+        "Soft-clipped bases R1" = "bases_softclip_r1",
+        "Soft-clipped bases R2" = "bases_softclip_r2",
+        "Mismatched bases R1" = "bases_mismatched_r1",
+        "Mismatched bases R2" = "bases_mismatched_r2",
+        "Mismatched bases R1 (excl. indels)" = "bases_mismatched_r1_noindels",
+        "Mismatched bases R2 (excl. indels)" = "bases_mismatched_r2_noindels",
+        "Q30 bases" = "bases_q30",
+        "Q30 bases R1" = "bases_q30_r1",
+        "Q30 bases R2" = "bases_q30_r2",
+        "Q30 bases (excl. dups & clipped bases)" = "bases_q30_nodups_noclipped",
+        "Total alignments" = "alignments_tot",
+        "Secondary alignments" = "alignments_secondary",
+        "Supplementary (chimeric) alignments" = "alignments_chimeric",
+        "Estimated read length" = "read_len",
+        "Bases in reference genome" = "bases_in_ref_genome",
+        "Bases in target bed [% of genome]" = "bases_in_target_bed_genome_pct",
+        "Average sequenced coverage over genome" = "cov_avg_seq_over_genome",
+        "Insert length: mean" = "insert_len_mean",
+        "Insert length: median" = "insert_len_median",
+        "Insert length: standard deviation" = "insert_len_std_dev",
+        "Provided sex chromosome ploidy" = "ploidy_sex_chrom_provided",
+        "Estimated sample contamination" = "contamination_est",
+        "DRAGEN mapping rate [mil. reads/second]" = "mapping_rate_dragen_milreads_per_sec",
+        "Number of duplicate marked and mate reads removed" = "reads_num_dupmarked_mate_reads_removed",
+        "Total reads in RG" = "reads_tot_rg",
+        "Filtered rRNA reads" = "reads_rrna_filtered"
       )
       x <- self$path
       raw <- readr::read_lines(x)
@@ -1119,34 +1206,34 @@ PloidyEstimationMetricsFile <- R6::R6Class(
       raw <- readr::read_lines(x)
       assertthat::assert_that(grepl("PLOIDY ESTIMATION", raw[1]))
       abbrev_nm <- c(
-        "Autosomal median coverage" = "cov_auto_median_dragen",
-        "X median coverage" = "cov_x_median_dragen",
-        "Y median coverage" = "cov_y_median_dragen",
-        "1 median / Autosomal median" = "cov_1_div_auto_median_dragen",
-        "2 median / Autosomal median" = "cov_2_div_auto_median_dragen",
-        "3 median / Autosomal median" = "cov_3_div_auto_median_dragen",
-        "4 median / Autosomal median" = "cov_4_div_auto_median_dragen",
-        "5 median / Autosomal median" = "cov_5_div_auto_median_dragen",
-        "6 median / Autosomal median" = "cov_6_div_auto_median_dragen",
-        "7 median / Autosomal median" = "cov_7_div_auto_median_dragen",
-        "8 median / Autosomal median" = "cov_8_div_auto_median_dragen",
-        "9 median / Autosomal median" = "cov_9_div_auto_median_dragen",
-        "10 median / Autosomal median" = "cov_10_div_auto_median_dragen",
-        "11 median / Autosomal median" = "cov_11_div_auto_median_dragen",
-        "12 median / Autosomal median" = "cov_12_div_auto_median_dragen",
-        "13 median / Autosomal median" = "cov_13_div_auto_median_dragen",
-        "14 median / Autosomal median" = "cov_14_div_auto_median_dragen",
-        "15 median / Autosomal median" = "cov_15_div_auto_median_dragen",
-        "16 median / Autosomal median" = "cov_16_div_auto_median_dragen",
-        "17 median / Autosomal median" = "cov_17_div_auto_median_dragen",
-        "18 median / Autosomal median" = "cov_18_div_auto_median_dragen",
-        "19 median / Autosomal median" = "cov_19_div_auto_median_dragen",
-        "20 median / Autosomal median" = "cov_20_div_auto_median_dragen",
-        "21 median / Autosomal median" = "cov_21_div_auto_median_dragen",
-        "22 median / Autosomal median" = "cov_22_div_auto_median_dragen",
-        "X median / Autosomal median" = "cov_x_div_auto_median_dragen",
-        "Y median / Autosomal median" = "cov_y_div_auto_median_dragen",
-        "Ploidy estimation" = "ploidy_est_dragen"
+        "Autosomal median coverage" = "cov_auto_median",
+        "X median coverage" = "cov_x_median",
+        "Y median coverage" = "cov_y_median",
+        "1 median / Autosomal median" = "cov_1_div_auto_median",
+        "2 median / Autosomal median" = "cov_2_div_auto_median",
+        "3 median / Autosomal median" = "cov_3_div_auto_median",
+        "4 median / Autosomal median" = "cov_4_div_auto_median",
+        "5 median / Autosomal median" = "cov_5_div_auto_median",
+        "6 median / Autosomal median" = "cov_6_div_auto_median",
+        "7 median / Autosomal median" = "cov_7_div_auto_median",
+        "8 median / Autosomal median" = "cov_8_div_auto_median",
+        "9 median / Autosomal median" = "cov_9_div_auto_median",
+        "10 median / Autosomal median" = "cov_10_div_auto_median",
+        "11 median / Autosomal median" = "cov_11_div_auto_median",
+        "12 median / Autosomal median" = "cov_12_div_auto_median",
+        "13 median / Autosomal median" = "cov_13_div_auto_median",
+        "14 median / Autosomal median" = "cov_14_div_auto_median",
+        "15 median / Autosomal median" = "cov_15_div_auto_median",
+        "16 median / Autosomal median" = "cov_16_div_auto_median",
+        "17 median / Autosomal median" = "cov_17_div_auto_median",
+        "18 median / Autosomal median" = "cov_18_div_auto_median",
+        "19 median / Autosomal median" = "cov_19_div_auto_median",
+        "20 median / Autosomal median" = "cov_20_div_auto_median",
+        "21 median / Autosomal median" = "cov_21_div_auto_median",
+        "22 median / Autosomal median" = "cov_22_div_auto_median",
+        "X median / Autosomal median" = "cov_x_div_auto_median",
+        "Y median / Autosomal median" = "cov_y_div_auto_median",
+        "Ploidy estimation" = "ploidy_est"
       )
 
       d <- raw |>
@@ -1158,7 +1245,7 @@ PloidyEstimationMetricsFile <- R6::R6Class(
         ) |>
         tidyr::pivot_wider(names_from = "var", values_from = "value")
       # now convert all except 'Ploidy estimation' to numeric
-      cols1 <- colnames(d)[colnames(d) != "ploidy_est_dragen"]
+      cols1 <- colnames(d)[colnames(d) != "ploidy_est"]
       d |>
         dplyr::mutate(dplyr::across(dplyr::all_of(cols1), as.numeric))
     },
@@ -1339,31 +1426,31 @@ VCMetricsFile <- R6::R6Class(
     #' @return tibble with one row and metrics spread across individual columns.
     read = function() {
       abbrev_nm <- c(
-        "Total" = "var_tot_dragen",
-        "Biallelic" = "var_biallelic_dragen",
-        "Multiallelic" = "var_multiallelic_dragen",
-        "SNPs" = "var_snp_dragen",
-        "Insertions (Hom)" = "var_ins_hom_dragen",
-        "Insertions (Het)" = "var_ins_het_dragen",
-        "Deletions (Hom)" = "var_del_hom_dragen",
-        "Deletions (Het)" = "var_del_het_dragen",
-        "Indels (Het)" = "var_indel_het_dragen",
-        "Chr X number of SNPs over genome" = "var_snp_x_over_genome_dragen",
-        "Chr Y number of SNPs over genome" = "var_snp_y_over_genome_dragen",
-        "(Chr X SNPs)/(chr Y SNPs) ratio over genome" = "var_x_over_y_snp_ratio_over_genome_dragen",
-        "SNP Transitions" = "var_snp_transitions_dragen",
-        "SNP Transversions" = "var_snp_transversions_dragen",
-        "Ti/Tv ratio" = "var_ti_tv_ratio_dragen",
-        "Heterozygous" = "var_heterozygous_dragen",
-        "Homozygous" = "var_homozygous_dragen",
-        "Het/Hom ratio" = "var_het_hom_ratio_dragen",
-        "In dbSNP" = "var_in_dbsnp_dragen",
-        "Not in dbSNP" = "var_nin_dbsnp_dragen",
-        "Percent Callability" = "callability_pct_dragen",
-        "Percent Autosome Callability" = "callability_auto_pct_dragen",
-        "Number of samples" = "sample_num_dragen",
-        "Reads Processed" = "reads_processed_dragen",
-        "Child Sample" = "sample_child_dragen"
+        "Total" = "var_tot",
+        "Biallelic" = "var_biallelic",
+        "Multiallelic" = "var_multiallelic",
+        "SNPs" = "var_snp",
+        "Insertions (Hom)" = "var_ins_hom",
+        "Insertions (Het)" = "var_ins_het",
+        "Deletions (Hom)" = "var_del_hom",
+        "Deletions (Het)" = "var_del_het",
+        "Indels (Het)" = "var_indel_het",
+        "Chr X number of SNPs over genome" = "var_snp_x_over_genome",
+        "Chr Y number of SNPs over genome" = "var_snp_y_over_genome",
+        "(Chr X SNPs)/(chr Y SNPs) ratio over genome" = "var_x_over_y_snp_ratio_over_genome",
+        "SNP Transitions" = "var_snp_transitions",
+        "SNP Transversions" = "var_snp_transversions",
+        "Ti/Tv ratio" = "var_ti_tv_ratio",
+        "Heterozygous" = "var_heterozygous",
+        "Homozygous" = "var_homozygous",
+        "Het/Hom ratio" = "var_het_hom_ratio",
+        "In dbSNP" = "var_in_dbsnp",
+        "Not in dbSNP" = "var_nin_dbsnp",
+        "Percent Callability" = "callability_pct",
+        "Percent Autosome Callability" = "callability_auto_pct",
+        "Number of samples" = "sample_num",
+        "Reads Processed" = "reads_processed",
+        "Child Sample" = "sample_child"
       )
       x <- self$path
       raw <- readr::read_lines(x)
@@ -1445,28 +1532,28 @@ TrimmerMetricsFile <- R6::R6Class(
       d <- readr::read_lines(x)
       assertthat::assert_that(grepl("TRIMMER STATISTICS", d[1]))
       abbrev_nm <- c(
-        "Total input reads"                              = "reads_tot_input_dragen",
-        "Total input bases"                              = "bases_tot_dragen",
-        "Total input bases R1"                           = "bases_r1_dragen",
-        "Total input bases R2"                           = "bases_r2_dragen",
-        "Average input read length"                      = "read_len_avg_dragen",
-        "Total trimmed reads"                            = "reads_trimmed_tot_dragen",
-        "Total trimmed bases"                            = "bases_trimmed_tot_dragen",
-        "Average bases trimmed per read"                 = "bases_trimmed_avg_per_read_dragen",
-        "Average bases trimmed per trimmed read"         = "bases_trimmed_avg_per_trimmedread_dragen",
-        "Remaining poly-G K-mers R1 3prime"              = "polygkmers3r1_remaining_dragen",
-        "Remaining poly-G K-mers R2 3prime"              = "polygkmers3r2_remaining_dragen",
-        "Poly-G soft trimmed reads unfiltered R1 3prime" = "polyg_soft_trimmed_reads_unfilt_3r1_dragen",
-        "Poly-G soft trimmed reads unfiltered R2 3prime" = "polyg_soft_trimmed_reads_unfilt_3r2_dragen",
-        "Poly-G soft trimmed reads filtered R1 3prime"   = "polyg_soft_trimmed_reads_filt_3r1_dragen",
-        "Poly-G soft trimmed reads filtered R2 3prime"   = "polyg_soft_trimmed_reads_filt_3r2_dragen",
-        "Poly-G soft trimmed bases unfiltered R1 3prime" = "polyg_soft_trimmed_bases_unfilt_3r1_dragen",
-        "Poly-G soft trimmed bases unfiltered R2 3prime" = "polyg_soft_trimmed_bases_unfilt_3r2_dragen",
-        "Poly-G soft trimmed bases filtered R1 3prime"   = "polyg_soft_trimmed_bases_filt_3r1_dragen",
-        "Poly-G soft trimmed bases filtered R2 3prime"   = "polyg_soft_trimmed_bases_filt_3r2_dragen",
-        "Total filtered reads"                           = "reads_tot_filt_dragen",
-        "Reads filtered for minimum read length R1"      = "reads_filt_minreadlenr1_dragen",
-        "Reads filtered for minimum read length R2"      = "reads_filt_minreadlenr2_dragen"
+        "Total input reads"                              = "reads_tot_input",
+        "Total input bases"                              = "bases_tot",
+        "Total input bases R1"                           = "bases_r1",
+        "Total input bases R2"                           = "bases_r2",
+        "Average input read length"                      = "read_len_avg",
+        "Total trimmed reads"                            = "reads_trimmed_tot",
+        "Total trimmed bases"                            = "bases_trimmed_tot",
+        "Average bases trimmed per read"                 = "bases_trimmed_avg_per_read",
+        "Average bases trimmed per trimmed read"         = "bases_trimmed_avg_per_trimmedread",
+        "Remaining poly-G K-mers R1 3prime"              = "polygkmers3r1_remaining",
+        "Remaining poly-G K-mers R2 3prime"              = "polygkmers3r2_remaining",
+        "Poly-G soft trimmed reads unfiltered R1 3prime" = "polyg_soft_trimmed_reads_unfilt_3r1",
+        "Poly-G soft trimmed reads unfiltered R2 3prime" = "polyg_soft_trimmed_reads_unfilt_3r2",
+        "Poly-G soft trimmed reads filtered R1 3prime"   = "polyg_soft_trimmed_reads_filt_3r1",
+        "Poly-G soft trimmed reads filtered R2 3prime"   = "polyg_soft_trimmed_reads_filt_3r2",
+        "Poly-G soft trimmed bases unfiltered R1 3prime" = "polyg_soft_trimmed_bases_unfilt_3r1",
+        "Poly-G soft trimmed bases unfiltered R2 3prime" = "polyg_soft_trimmed_bases_unfilt_3r2",
+        "Poly-G soft trimmed bases filtered R1 3prime"   = "polyg_soft_trimmed_bases_filt_3r1",
+        "Poly-G soft trimmed bases filtered R2 3prime"   = "polyg_soft_trimmed_bases_filt_3r2",
+        "Total filtered reads"                           = "reads_tot_filt",
+        "Reads filtered for minimum read length R1"      = "reads_filt_minreadlenr1",
+        "Reads filtered for minimum read length R2"      = "reads_filt_minreadlenr2"
       )
 
       d |>
diff --git a/R/tso_dragen.R b/R/tso_dragen.R
index 03d8845..a7aa883 100644
--- a/R/tso_dragen.R
+++ b/R/tso_dragen.R
@@ -69,7 +69,7 @@ Wf_dragen <- R6::R6Class(
         glue("{pref}\\.sv_metrics\\.csv$"), "svMetrics",
         glue("{pref}\\.time_metrics\\.csv$"), "timeMetrics",
         glue("{pref}\\.trimmer_metrics\\.csv$"), "trimmerMetrics",
-        glue("{pref}\\.umi_metrics\\.csv$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.umi_metrics\\.csv$"), "umiMetrics",
         glue("{pref}\\.vc_metrics\\.csv$"), "vcMetrics"
       )
       regexes <- reg1 |>
@@ -248,6 +248,12 @@ Wf_dragen <- R6::R6Class(
     read_gcMetrics = function(x) {
       dat <- dragen_gc_metrics_read(x)
       dat
+    },
+    #' @description Read `umi_metrics.csv` file.
+    #' @param x Path to file.
+    read_umiMetrics = function(x) {
+      dat <- dragen_umi_metrics_read(x)
+      dat
     }
   ) # end public
 ) # end Wf_dragen
diff --git a/man/Wf_dragen.Rd b/man/Wf_dragen.Rd
index 04b3d0e..2387a47 100644
--- a/man/Wf_dragen.Rd
+++ b/man/Wf_dragen.Rd
@@ -58,6 +58,7 @@ d_write <- t1$write(
 \item \href{#method-Wf_dragen-read_cnvMetrics}{\code{Wf_dragen$read_cnvMetrics()}}
 \item \href{#method-Wf_dragen-read_fastqcMetrics}{\code{Wf_dragen$read_fastqcMetrics()}}
 \item \href{#method-Wf_dragen-read_gcMetrics}{\code{Wf_dragen$read_gcMetrics()}}
+\item \href{#method-Wf_dragen-read_umiMetrics}{\code{Wf_dragen$read_umiMetrics()}}
 \item \href{#method-Wf_dragen-clone}{\code{Wf_dragen$clone()}}
 }
 }
@@ -341,6 +342,23 @@ Read \code{gc_metrics.csv} file.
 \if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_gcMetrics(x)}\if{html}{\out{</div>}}
 }
 
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_umiMetrics"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_umiMetrics}{}}}
+\subsection{Method \code{read_umiMetrics()}}{
+Read \code{umi_metrics.csv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_umiMetrics(x)}\if{html}{\out{</div>}}
+}
+
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
diff --git a/man/dragen_umi_metrics_read.Rd b/man/dragen_umi_metrics_read.Rd
new file mode 100644
index 0000000..ca80a82
--- /dev/null
+++ b/man/dragen_umi_metrics_read.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dragen.R
+\name{dragen_umi_metrics_read}
+\alias{dragen_umi_metrics_read}
+\title{Read DRAGEN UMI Metrics}
+\usage{
+dragen_umi_metrics_read(x)
+}
+\arguments{
+\item{x}{Path to file.}
+}
+\value{
+Tibble with metrics.
+}
+\description{
+Reads the \code{umi_metrics.csv} file output from DRAGEN.
+}
diff --git a/tests/testthat/test-roxytest-testexamples-dragen.R b/tests/testthat/test-roxytest-testexamples-dragen.R
index 6d25116..bbadc08 100644
--- a/tests/testthat/test-roxytest-testexamples-dragen.R
+++ b/tests/testthat/test-roxytest-testexamples-dragen.R
@@ -2,7 +2,7 @@
 
 # File R/dragen.R: @testexamples
 
-test_that("Function time_metrics_process() @ L1312", {
+test_that("Function time_metrics_process() @ L1398", {
   
   p <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys")
   x <- TimeMetricsFile$new(p)

From 1e4cdb43186d7d52a35e44775c046016b6ea034c Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Fri, 18 Oct 2024 01:23:13 +1100
Subject: [PATCH 12/17] dragen: parse msi metrics

---
 R/dragen.R                                    |  2 +-
 R/dragen_fastqc.R                             |  7 ----
 R/tso.R                                       |  9 ++++-
 R/tso_dragen.R                                | 17 +++++++--
 man/FastqcMetricsFile.Rd                      |  8 -----
 man/Wf_dragen.Rd                              | 36 +++++++++++++++++++
 .../test-roxytest-testexamples-dragen.R       |  2 +-
 7 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/R/dragen.R b/R/dragen.R
index d3587b6..42811f9 100644
--- a/R/dragen.R
+++ b/R/dragen.R
@@ -63,7 +63,7 @@ dragen_umi_metrics_read <- function(x) {
       count = gsub("\\{|\\}", "", .data$count),
       count = strsplit(.data$count, "\\|")
     ) |>
-    tidyr::unnest(count) |>
+    tidyr::unnest("count") |>
     dplyr::mutate(count = as.numeric(.data$count)) |>
     tidyr::nest(.by = "name")
   d2 <- d1 |>
diff --git a/R/dragen_fastqc.R b/R/dragen_fastqc.R
index 3615cd6..4390037 100644
--- a/R/dragen_fastqc.R
+++ b/R/dragen_fastqc.R
@@ -4,13 +4,6 @@
 #' Contains methods for reading and displaying contents of
 #' the `fastqc_metrics.csv` file output from DRAGEN.
 #'
-#' @examples
-#' x <- system.file("extdata/wgs/SEQC-II.fastqc_metrics.csv.gz", package = "dracarys")
-#' fq <- FastqcMetricsFile$new(x)
-#' d <- fq$read()
-#' fq$write(d, out_dir = tempdir(), prefix = "seqc_fq", out_format = "tsv")
-#' # fq$plot(d)
-#'
 #' @export
 FastqcMetricsFile <- R6::R6Class(
   "FastqcMetricsFile",
diff --git a/R/tso.R b/R/tso.R
index a302003..7596449 100644
--- a/R/tso.R
+++ b/R/tso.R
@@ -471,6 +471,13 @@ tso_msi_read <- function(x) {
   if (j[["PercentageUnstableSites"]] == "NaN") {
     j[["PercentageUnstableSites"]] <- NA_real_
   }
+  num_cols <- c(
+    "TotalMicrosatelliteSitesAssessed", "TotalMicrosatelliteSitesUnstable",
+    "PercentageUnstableSites", "SumDistance", "SumJsd"
+  )
   tibble::as_tibble_row(j) |>
-    dplyr::mutate(ResultIsValid = as.character(.data$ResultIsValid))
+    dplyr::mutate(
+      dplyr::across(dplyr::any_of(num_cols), as.numeric),
+      ResultIsValid = as.character(.data$ResultIsValid),
+    )
 }
diff --git a/R/tso_dragen.R b/R/tso_dragen.R
index a7aa883..d5175a3 100644
--- a/R/tso_dragen.R
+++ b/R/tso_dragen.R
@@ -64,8 +64,8 @@ Wf_dragen <- R6::R6Class(
         glue("{pref}\\.gc_metrics\\.csv$"), "gcMetrics",
         glue("{pref}\\.gvcf_metrics\\.csv$"), "vcMetrics",
         glue("{pref}\\.mapping_metrics\\.csv$"), "mappingMetrics",
-        glue("{pref}\\.microsat_diffs\\.txt$"), "DOWNLOAD_ONLY",
-        glue("{pref}\\.microsat_output\\.json$"), "DOWNLOAD_ONLY",
+        glue("{pref}\\.microsat_diffs\\.txt$"), "msiDiffs",
+        glue("{pref}\\.microsat_output\\.json$"), "msi",
         glue("{pref}\\.sv_metrics\\.csv$"), "svMetrics",
         glue("{pref}\\.time_metrics\\.csv$"), "timeMetrics",
         glue("{pref}\\.trimmer_metrics\\.csv$"), "trimmerMetrics",
@@ -254,6 +254,19 @@ Wf_dragen <- R6::R6Class(
     read_umiMetrics = function(x) {
       dat <- dragen_umi_metrics_read(x)
       dat
+    },
+    #' @description Read `microsat_output.json` file.
+    #' @param x Path to file.
+    read_msi = function(x) {
+      dat <- tso_msi_read(x)
+      tibble::tibble(name = "msi", data = list(dat[]))
+    },
+    #' @description Read `microsat_diffs.txt` file.
+    #' @param x Path to file.
+    read_msiDiffs = function(x) {
+      dat <- readr::read_tsv(x, col_types = "cdccddc") |>
+        dplyr::rename(Chromosome = "#Chromosome")
+      tibble::tibble(name = "msidiffs", data = list(dat[]))
     }
   ) # end public
 ) # end Wf_dragen
diff --git a/man/FastqcMetricsFile.Rd b/man/FastqcMetricsFile.Rd
index ec13b16..301bce3 100644
--- a/man/FastqcMetricsFile.Rd
+++ b/man/FastqcMetricsFile.Rd
@@ -6,14 +6,6 @@
 \description{
 Contains methods for reading and displaying contents of
 the \code{fastqc_metrics.csv} file output from DRAGEN.
-}
-\examples{
-x <- system.file("extdata/wgs/SEQC-II.fastqc_metrics.csv.gz", package = "dracarys")
-fq <- FastqcMetricsFile$new(x)
-d <- fq$read()
-fq$write(d, out_dir = tempdir(), prefix = "seqc_fq", out_format = "tsv")
-# fq$plot(d)
-
 }
 \section{Super class}{
 \code{\link[dracarys:File]{dracarys::File}} -> \code{FastqcMetricsFile}
diff --git a/man/Wf_dragen.Rd b/man/Wf_dragen.Rd
index 2387a47..141e855 100644
--- a/man/Wf_dragen.Rd
+++ b/man/Wf_dragen.Rd
@@ -59,6 +59,8 @@ d_write <- t1$write(
 \item \href{#method-Wf_dragen-read_fastqcMetrics}{\code{Wf_dragen$read_fastqcMetrics()}}
 \item \href{#method-Wf_dragen-read_gcMetrics}{\code{Wf_dragen$read_gcMetrics()}}
 \item \href{#method-Wf_dragen-read_umiMetrics}{\code{Wf_dragen$read_umiMetrics()}}
+\item \href{#method-Wf_dragen-read_msi}{\code{Wf_dragen$read_msi()}}
+\item \href{#method-Wf_dragen-read_msiDiffs}{\code{Wf_dragen$read_msiDiffs()}}
 \item \href{#method-Wf_dragen-clone}{\code{Wf_dragen$clone()}}
 }
 }
@@ -359,6 +361,40 @@ Read \code{umi_metrics.csv} file.
 \if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_umiMetrics(x)}\if{html}{\out{</div>}}
 }
 
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_msi"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_msi}{}}}
+\subsection{Method \code{read_msi()}}{
+Read \code{microsat_output.json} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_msi(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_dragen-read_msiDiffs"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_dragen-read_msiDiffs}{}}}
+\subsection{Method \code{read_msiDiffs()}}{
+Read \code{microsat_diffs.txt} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_dragen$read_msiDiffs(x)}\if{html}{\out{</div>}}
+}
+
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
diff --git a/tests/testthat/test-roxytest-testexamples-dragen.R b/tests/testthat/test-roxytest-testexamples-dragen.R
index bbadc08..8db3f7e 100644
--- a/tests/testthat/test-roxytest-testexamples-dragen.R
+++ b/tests/testthat/test-roxytest-testexamples-dragen.R
@@ -2,7 +2,7 @@
 
 # File R/dragen.R: @testexamples
 
-test_that("Function time_metrics_process() @ L1398", {
+test_that("Function time_metrics_process() @ L1399", {
   
   p <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys")
   x <- TimeMetricsFile$new(p)

From 1d68fd1e56ad1644ee2029cba864488e69d995f9 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Fri, 18 Oct 2024 01:48:35 +1100
Subject: [PATCH 13/17] dragen: remove per-file R6 classes (man/)

---
 man/FastqcMetricsFile.Rd                      |  93 -----------
 man/FragmentLengthHistFile.Rd                 | 131 ----------------
 man/MappingMetricsFile.Rd                     | 106 -------------
 man/ReplayFile.Rd                             |  94 -----------
 man/SvMetricsFile.Rd                          |  94 -----------
 man/TimeMetricsFile.Rd                        | 100 ------------
 man/TrimmerMetricsFile.Rd                     | 100 ------------
 man/VCMetricsFile.Rd                          |  94 -----------
 man/WgsContigMeanCovFile.Rd                   | 146 ------------------
 man/WgsCoverageMetricsFile.Rd                 | 105 -------------
 man/WgsFineHistFile.Rd                        | 136 ----------------
 man/WgsHistFile.Rd                            |  94 -----------
 man/dragen_contig_mean_coverage_plot.Rd       |  23 +++
 ...ist_plot.Rd => dragen_fraglenhist_plot.Rd} |  10 +-
 man/time_metrics_process.Rd                   |  30 ----
 .../test-roxytest-testexamples-dragen.R       |  13 --
 16 files changed, 28 insertions(+), 1341 deletions(-)
 delete mode 100644 man/FastqcMetricsFile.Rd
 delete mode 100644 man/FragmentLengthHistFile.Rd
 delete mode 100644 man/MappingMetricsFile.Rd
 delete mode 100644 man/ReplayFile.Rd
 delete mode 100644 man/SvMetricsFile.Rd
 delete mode 100644 man/TimeMetricsFile.Rd
 delete mode 100644 man/TrimmerMetricsFile.Rd
 delete mode 100644 man/VCMetricsFile.Rd
 delete mode 100644 man/WgsContigMeanCovFile.Rd
 delete mode 100644 man/WgsCoverageMetricsFile.Rd
 delete mode 100644 man/WgsFineHistFile.Rd
 delete mode 100644 man/WgsHistFile.Rd
 create mode 100644 man/dragen_contig_mean_coverage_plot.Rd
 rename man/{tso_fraglenhist_plot.Rd => dragen_fraglenhist_plot.Rd} (66%)
 delete mode 100644 man/time_metrics_process.Rd
 delete mode 100644 tests/testthat/test-roxytest-testexamples-dragen.R

diff --git a/man/FastqcMetricsFile.Rd b/man/FastqcMetricsFile.Rd
deleted file mode 100644
index 301bce3..0000000
--- a/man/FastqcMetricsFile.Rd
+++ /dev/null
@@ -1,93 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dragen_fastqc.R
-\name{FastqcMetricsFile}
-\alias{FastqcMetricsFile}
-\title{FastqcMetrics R6 Class}
-\description{
-Contains methods for reading and displaying contents of
-the \code{fastqc_metrics.csv} file output from DRAGEN.
-}
-\section{Super class}{
-\code{\link[dracarys:File]{dracarys::File}} -> \code{FastqcMetricsFile}
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-FastqcMetricsFile-read}{\code{FastqcMetricsFile$read()}}
-\item \href{#method-FastqcMetricsFile-write}{\code{FastqcMetricsFile$write()}}
-\item \href{#method-FastqcMetricsFile-clone}{\code{FastqcMetricsFile$clone()}}
-}
-}
-\if{html}{\out{
-<details open><summary>Inherited methods</summary>
-<ul>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="bname"><a href='../../dracarys/html/File.html#method-File-bname'><code>dracarys::File$bname()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="initialize"><a href='../../dracarys/html/File.html#method-File-initialize'><code>dracarys::File$initialize()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="print"><a href='../../dracarys/html/File.html#method-File-print'><code>dracarys::File$print()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="type"><a href='../../dracarys/html/File.html#method-File-type'><code>dracarys::File$type()</code></a></span></li>
-</ul>
-</details>
-}}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-FastqcMetricsFile-read"></a>}}
-\if{latex}{\out{\hypertarget{method-FastqcMetricsFile-read}{}}}
-\subsection{Method \code{read()}}{
-Reads the \code{fastqc_metrics.csv} file output from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{FastqcMetricsFile$read()}\if{html}{\out{</div>}}
-}
-
-\subsection{Returns}{
-tibble. TODO.
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-FastqcMetricsFile-write"></a>}}
-\if{latex}{\out{\hypertarget{method-FastqcMetricsFile-write}{}}}
-\subsection{Method \code{write()}}{
-Writes a tidy version of the \code{fastqc_metrics.csv} file output
-from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{FastqcMetricsFile$write(
-  d,
-  out_dir = NULL,
-  prefix,
-  out_format = "tsv",
-  drid = NULL
-)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{out_dir}}{Output directory.}
-
-\item{\code{prefix}}{Prefix of output file(s).}
-
-\item{\code{out_format}}{Format of output file(s).}
-
-\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-FastqcMetricsFile-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-FastqcMetricsFile-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{FastqcMetricsFile$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/FragmentLengthHistFile.Rd b/man/FragmentLengthHistFile.Rd
deleted file mode 100644
index 07c9081..0000000
--- a/man/FragmentLengthHistFile.Rd
+++ /dev/null
@@ -1,131 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dragen.R
-\name{FragmentLengthHistFile}
-\alias{FragmentLengthHistFile}
-\title{FragmentLengthHistFile R6 Class}
-\description{
-Contains methods for reading and plotting contents of
-the \code{fragment_length_hist.csv} file output from DRAGEN.
-}
-\examples{
-x <- system.file("extdata/wgs/SEQC-II.fragment_length_hist.csv.gz", package = "dracarys")
-fl <- FragmentLengthHistFile$new(x)
-d <- fl$read() # or read(fl)
-fl$plot(d) # or plot(fl)
-fl$write(d |> dplyr::filter(count > 10), out_dir = tempdir(), prefix = "seqc_fl")
-}
-\section{Super class}{
-\code{\link[dracarys:File]{dracarys::File}} -> \code{FragmentLengthHistFile}
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-FragmentLengthHistFile-read}{\code{FragmentLengthHistFile$read()}}
-\item \href{#method-FragmentLengthHistFile-write}{\code{FragmentLengthHistFile$write()}}
-\item \href{#method-FragmentLengthHistFile-plot}{\code{FragmentLengthHistFile$plot()}}
-\item \href{#method-FragmentLengthHistFile-clone}{\code{FragmentLengthHistFile$clone()}}
-}
-}
-\if{html}{\out{
-<details open><summary>Inherited methods</summary>
-<ul>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="bname"><a href='../../dracarys/html/File.html#method-File-bname'><code>dracarys::File$bname()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="initialize"><a href='../../dracarys/html/File.html#method-File-initialize'><code>dracarys::File$initialize()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="print"><a href='../../dracarys/html/File.html#method-File-print'><code>dracarys::File$print()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="type"><a href='../../dracarys/html/File.html#method-File-type'><code>dracarys::File$type()</code></a></span></li>
-</ul>
-</details>
-}}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-FragmentLengthHistFile-read"></a>}}
-\if{latex}{\out{\hypertarget{method-FragmentLengthHistFile-read}{}}}
-\subsection{Method \code{read()}}{
-Reads the \code{fragment_length_hist.csv} file, which contains the
-fragment length distribution for each sample.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{FragmentLengthHistFile$read()}\if{html}{\out{</div>}}
-}
-
-\subsection{Returns}{
-A tibble with the following columns:
-\itemize{
-\item sample: name of sample
-\item fragmentLength: estimated fragment length
-\item count: number of reads with estimated fragment length
-}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-FragmentLengthHistFile-write"></a>}}
-\if{latex}{\out{\hypertarget{method-FragmentLengthHistFile-write}{}}}
-\subsection{Method \code{write()}}{
-Writes a tidy version of the \code{fragment_length_hist.csv} file output
-from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{FragmentLengthHistFile$write(
-  d,
-  out_dir = NULL,
-  prefix,
-  out_format = "tsv",
-  drid = NULL
-)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{out_dir}}{Output directory.}
-
-\item{\code{prefix}}{Prefix of output file(s).}
-
-\item{\code{out_format}}{Format of output file(s).}
-
-\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-FragmentLengthHistFile-plot"></a>}}
-\if{latex}{\out{\hypertarget{method-FragmentLengthHistFile-plot}{}}}
-\subsection{Method \code{plot()}}{
-Plots the fragment length distributions as given in the
-\code{fragment_length_hist.csv} file.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{FragmentLengthHistFile$plot(d, min_count = 10)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{min_count}}{Minimum read count to be plotted (Default: 10).}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-A ggplot2 plot containing fragment lengths on X axis and read counts
-on Y axis for each sample.
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-FragmentLengthHistFile-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-FragmentLengthHistFile-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{FragmentLengthHistFile$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/MappingMetricsFile.Rd b/man/MappingMetricsFile.Rd
deleted file mode 100644
index 62d5cfb..0000000
--- a/man/MappingMetricsFile.Rd
+++ /dev/null
@@ -1,106 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dragen.R
-\name{MappingMetricsFile}
-\alias{MappingMetricsFile}
-\title{MappingMetricsFile R6 Class}
-\description{
-Contains methods for reading and displaying contents of
-the \code{mapping_metrics.csv} file output from DRAGEN.
-This file contains mapping and aligning metrics, like the metrics computed by
-the Samtools Flagstat command. These metrics are available on an aggregate
-level (over all input data), and on a per read group level. NOTE: we are
-keeping only the read group level metrics (i.e. removing the aggregate data).
-Unless explicitly stated, the metrics units are in reads (i.e., not in
-terms of pairs or alignments).
-}
-\examples{
-x <- system.file("extdata/wgs/SEQC-II.mapping_metrics.csv.gz", package = "dracarys")
-mm <- MappingMetricsFile$new(x)
-d <- mm$read() # or read(mm)
-mm$write(d, out_dir = tempdir(), prefix = "seqc_mm", out_format = "tsv")
-
-}
-\section{Super class}{
-\code{\link[dracarys:File]{dracarys::File}} -> \code{MappingMetricsFile}
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-MappingMetricsFile-read}{\code{MappingMetricsFile$read()}}
-\item \href{#method-MappingMetricsFile-write}{\code{MappingMetricsFile$write()}}
-\item \href{#method-MappingMetricsFile-clone}{\code{MappingMetricsFile$clone()}}
-}
-}
-\if{html}{\out{
-<details open><summary>Inherited methods</summary>
-<ul>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="bname"><a href='../../dracarys/html/File.html#method-File-bname'><code>dracarys::File$bname()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="initialize"><a href='../../dracarys/html/File.html#method-File-initialize'><code>dracarys::File$initialize()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="print"><a href='../../dracarys/html/File.html#method-File-print'><code>dracarys::File$print()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="type"><a href='../../dracarys/html/File.html#method-File-type'><code>dracarys::File$type()</code></a></span></li>
-</ul>
-</details>
-}}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-MappingMetricsFile-read"></a>}}
-\if{latex}{\out{\hypertarget{method-MappingMetricsFile-read}{}}}
-\subsection{Method \code{read()}}{
-Reads the \code{mapping_metrics.csv} file output from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{MappingMetricsFile$read()}\if{html}{\out{</div>}}
-}
-
-\subsection{Returns}{
-tibble with one row of X metrics per read group.
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-MappingMetricsFile-write"></a>}}
-\if{latex}{\out{\hypertarget{method-MappingMetricsFile-write}{}}}
-\subsection{Method \code{write()}}{
-Writes a tidy version of the \code{mapping_metrics.csv} file output
-from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{MappingMetricsFile$write(
-  d,
-  out_dir = NULL,
-  prefix,
-  out_format = "tsv",
-  drid = NULL
-)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{out_dir}}{Output directory.}
-
-\item{\code{prefix}}{Prefix of output file(s).}
-
-\item{\code{out_format}}{Format of output file(s).}
-
-\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-MappingMetricsFile-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-MappingMetricsFile-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{MappingMetricsFile$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/ReplayFile.Rd b/man/ReplayFile.Rd
deleted file mode 100644
index c2c34eb..0000000
--- a/man/ReplayFile.Rd
+++ /dev/null
@@ -1,94 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dragen.R
-\name{ReplayFile}
-\alias{ReplayFile}
-\title{ReplayFile R6 Class}
-\description{
-Contains methods for reading contents of
-the \code{replay.json} file output from DRAGEN, which contains the DRAGEN command
-line, parameters and version for the specific run.
-}
-\examples{
-x <- system.file("extdata/wgs/SEQC-II-replay.json.gz", package = "dracarys")
-r <- ReplayFile$new(x)
-d <- r$read() # or read(r)
-r$write(d, out_dir = tempdir(), prefix = "seqc_replay", out_format = "tsv")
-}
-\section{Super class}{
-\code{\link[dracarys:File]{dracarys::File}} -> \code{ReplayFile}
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-ReplayFile-read}{\code{ReplayFile$read()}}
-\item \href{#method-ReplayFile-write}{\code{ReplayFile$write()}}
-\item \href{#method-ReplayFile-clone}{\code{ReplayFile$clone()}}
-}
-}
-\if{html}{\out{
-<details open><summary>Inherited methods</summary>
-<ul>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="bname"><a href='../../dracarys/html/File.html#method-File-bname'><code>dracarys::File$bname()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="initialize"><a href='../../dracarys/html/File.html#method-File-initialize'><code>dracarys::File$initialize()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="print"><a href='../../dracarys/html/File.html#method-File-print'><code>dracarys::File$print()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="type"><a href='../../dracarys/html/File.html#method-File-type'><code>dracarys::File$type()</code></a></span></li>
-</ul>
-</details>
-}}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-ReplayFile-read"></a>}}
-\if{latex}{\out{\hypertarget{method-ReplayFile-read}{}}}
-\subsection{Method \code{read()}}{
-Reads the \code{replay.json} file.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{ReplayFile$read()}\if{html}{\out{</div>}}
-}
-
-\subsection{Returns}{
-tibble with one row and metrics spread across individual columns.
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-ReplayFile-write"></a>}}
-\if{latex}{\out{\hypertarget{method-ReplayFile-write}{}}}
-\subsection{Method \code{write()}}{
-Writes a tidy version of the \code{replay.json} file output
-from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{ReplayFile$write(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{out_dir}}{Output directory.}
-
-\item{\code{prefix}}{Prefix of output file(s).}
-
-\item{\code{out_format}}{Format of output file(s).}
-
-\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-ReplayFile-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-ReplayFile-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{ReplayFile$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/SvMetricsFile.Rd b/man/SvMetricsFile.Rd
deleted file mode 100644
index e5c775d..0000000
--- a/man/SvMetricsFile.Rd
+++ /dev/null
@@ -1,94 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dragen.R
-\name{SvMetricsFile}
-\alias{SvMetricsFile}
-\title{SvMetricsFile R6 Class}
-\description{
-Contains methods for reading and displaying contents of
-the \code{sv_metrics.csv} file output from DRAGEN
-}
-\examples{
-x <- system.file("extdata/wgs/SEQC-II.sv_metrics.csv.gz", package = "dracarys")
-sv <- SvMetricsFile$new(x)
-d <- sv$read()
-sv$write(d, out_dir = tempdir(), prefix = "seqc_sv", out_format = "tsv")
-
-}
-\section{Super class}{
-\code{\link[dracarys:File]{dracarys::File}} -> \code{SvMetricsFile}
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-SvMetricsFile-read}{\code{SvMetricsFile$read()}}
-\item \href{#method-SvMetricsFile-write}{\code{SvMetricsFile$write()}}
-\item \href{#method-SvMetricsFile-clone}{\code{SvMetricsFile$clone()}}
-}
-}
-\if{html}{\out{
-<details open><summary>Inherited methods</summary>
-<ul>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="bname"><a href='../../dracarys/html/File.html#method-File-bname'><code>dracarys::File$bname()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="initialize"><a href='../../dracarys/html/File.html#method-File-initialize'><code>dracarys::File$initialize()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="print"><a href='../../dracarys/html/File.html#method-File-print'><code>dracarys::File$print()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="type"><a href='../../dracarys/html/File.html#method-File-type'><code>dracarys::File$type()</code></a></span></li>
-</ul>
-</details>
-}}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-SvMetricsFile-read"></a>}}
-\if{latex}{\out{\hypertarget{method-SvMetricsFile-read}{}}}
-\subsection{Method \code{read()}}{
-Reads the \code{sv_metrics.csv} file output from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{SvMetricsFile$read()}\if{html}{\out{</div>}}
-}
-
-\subsection{Returns}{
-tibble with one row and metrics spread across individual columns.
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-SvMetricsFile-write"></a>}}
-\if{latex}{\out{\hypertarget{method-SvMetricsFile-write}{}}}
-\subsection{Method \code{write()}}{
-Writes a tidy version of the \code{sv_metrics.csv} file output
-from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{SvMetricsFile$write(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{out_dir}}{Output directory.}
-
-\item{\code{prefix}}{Prefix of output file(s).}
-
-\item{\code{out_format}}{Format of output file(s).}
-
-\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-SvMetricsFile-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-SvMetricsFile-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{SvMetricsFile$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/TimeMetricsFile.Rd b/man/TimeMetricsFile.Rd
deleted file mode 100644
index 43b463d..0000000
--- a/man/TimeMetricsFile.Rd
+++ /dev/null
@@ -1,100 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dragen.R
-\name{TimeMetricsFile}
-\alias{TimeMetricsFile}
-\title{TimeMetricsFile R6 Class}
-\description{
-Contains methods for reading contents of
-the \code{time_metrics.csv} file output from DRAGEN, which contains
-a breakdown of the run duration for each DRAGEN process.
-}
-\examples{
-x <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys")
-tm <- TimeMetricsFile$new(x)
-d <- tm$read() # or read(tm)
-tm$write(d, out_dir = tempdir(), prefix = "seqc_time", out_format = "tsv")
-}
-\section{Super class}{
-\code{\link[dracarys:File]{dracarys::File}} -> \code{TimeMetricsFile}
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-TimeMetricsFile-read}{\code{TimeMetricsFile$read()}}
-\item \href{#method-TimeMetricsFile-write}{\code{TimeMetricsFile$write()}}
-\item \href{#method-TimeMetricsFile-clone}{\code{TimeMetricsFile$clone()}}
-}
-}
-\if{html}{\out{
-<details open><summary>Inherited methods</summary>
-<ul>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="bname"><a href='../../dracarys/html/File.html#method-File-bname'><code>dracarys::File$bname()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="initialize"><a href='../../dracarys/html/File.html#method-File-initialize'><code>dracarys::File$initialize()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="print"><a href='../../dracarys/html/File.html#method-File-print'><code>dracarys::File$print()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="type"><a href='../../dracarys/html/File.html#method-File-type'><code>dracarys::File$type()</code></a></span></li>
-</ul>
-</details>
-}}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-TimeMetricsFile-read"></a>}}
-\if{latex}{\out{\hypertarget{method-TimeMetricsFile-read}{}}}
-\subsection{Method \code{read()}}{
-Reads the \code{time_metrics.csv} file.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{TimeMetricsFile$read()}\if{html}{\out{</div>}}
-}
-
-\subsection{Returns}{
-tibble with one row and metrics spread across individual columns.
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-TimeMetricsFile-write"></a>}}
-\if{latex}{\out{\hypertarget{method-TimeMetricsFile-write}{}}}
-\subsection{Method \code{write()}}{
-Writes a tidy version of the \code{time_metrics.csv} file output
-from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{TimeMetricsFile$write(
-  d,
-  out_dir = NULL,
-  prefix,
-  out_format = "tsv",
-  drid = NULL
-)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{out_dir}}{Output directory.}
-
-\item{\code{prefix}}{Prefix of output file(s).}
-
-\item{\code{out_format}}{Format of output file(s).}
-
-\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-TimeMetricsFile-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-TimeMetricsFile-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{TimeMetricsFile$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/TrimmerMetricsFile.Rd b/man/TrimmerMetricsFile.Rd
deleted file mode 100644
index a3b4fe8..0000000
--- a/man/TrimmerMetricsFile.Rd
+++ /dev/null
@@ -1,100 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dragen.R
-\name{TrimmerMetricsFile}
-\alias{TrimmerMetricsFile}
-\title{TrimmerMetricsFile R6 Class}
-\description{
-Contains methods for reading and displaying contents of
-the \code{trimmer_metrics.csv} file output from DRAGEN
-}
-\examples{
-x <- system.file("extdata/wgs/SEQC-II.trimmer_metrics.csv.gz", package = "dracarys")
-tm <- TrimmerMetricsFile$new(x)
-d <- tm$read()
-tm$write(d, out_dir = tempdir(), prefix = "seqc_tm", out_format = "tsv")
-
-}
-\section{Super class}{
-\code{\link[dracarys:File]{dracarys::File}} -> \code{TrimmerMetricsFile}
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-TrimmerMetricsFile-read}{\code{TrimmerMetricsFile$read()}}
-\item \href{#method-TrimmerMetricsFile-write}{\code{TrimmerMetricsFile$write()}}
-\item \href{#method-TrimmerMetricsFile-clone}{\code{TrimmerMetricsFile$clone()}}
-}
-}
-\if{html}{\out{
-<details open><summary>Inherited methods</summary>
-<ul>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="bname"><a href='../../dracarys/html/File.html#method-File-bname'><code>dracarys::File$bname()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="initialize"><a href='../../dracarys/html/File.html#method-File-initialize'><code>dracarys::File$initialize()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="print"><a href='../../dracarys/html/File.html#method-File-print'><code>dracarys::File$print()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="type"><a href='../../dracarys/html/File.html#method-File-type'><code>dracarys::File$type()</code></a></span></li>
-</ul>
-</details>
-}}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-TrimmerMetricsFile-read"></a>}}
-\if{latex}{\out{\hypertarget{method-TrimmerMetricsFile-read}{}}}
-\subsection{Method \code{read()}}{
-Reads the \code{trimmer_metrics.csv} file output from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{TrimmerMetricsFile$read()}\if{html}{\out{</div>}}
-}
-
-\subsection{Returns}{
-tibble with one row and metrics spread across individual columns.
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-TrimmerMetricsFile-write"></a>}}
-\if{latex}{\out{\hypertarget{method-TrimmerMetricsFile-write}{}}}
-\subsection{Method \code{write()}}{
-Writes a tidy version of the \code{trimmer_metrics.csv} file output
-from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{TrimmerMetricsFile$write(
-  d,
-  out_dir = NULL,
-  prefix,
-  out_format = "tsv",
-  drid = NULL
-)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{out_dir}}{Output directory.}
-
-\item{\code{prefix}}{Prefix of output file(s).}
-
-\item{\code{out_format}}{Format of output file(s).}
-
-\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-TrimmerMetricsFile-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-TrimmerMetricsFile-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{TrimmerMetricsFile$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/VCMetricsFile.Rd b/man/VCMetricsFile.Rd
deleted file mode 100644
index 3011160..0000000
--- a/man/VCMetricsFile.Rd
+++ /dev/null
@@ -1,94 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dragen.R
-\name{VCMetricsFile}
-\alias{VCMetricsFile}
-\title{VCMetricsFile R6 Class}
-\description{
-Contains methods for reading and displaying contents of
-the \code{vc_metrics.csv} file output from DRAGEN, which contains variant calling metrics.
-}
-\examples{
-x <- system.file("extdata/wgs/SEQC-II.vc_metrics.csv.gz", package = "dracarys")
-vm <- VCMetricsFile$new(x)
-d <- vm$read() # or read(vm)
-vm$write(d, out_dir = tempdir(), prefix = "seqc_vc", out_format = "tsv")
-
-}
-\section{Super class}{
-\code{\link[dracarys:File]{dracarys::File}} -> \code{VCMetricsFile}
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-VCMetricsFile-read}{\code{VCMetricsFile$read()}}
-\item \href{#method-VCMetricsFile-write}{\code{VCMetricsFile$write()}}
-\item \href{#method-VCMetricsFile-clone}{\code{VCMetricsFile$clone()}}
-}
-}
-\if{html}{\out{
-<details open><summary>Inherited methods</summary>
-<ul>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="bname"><a href='../../dracarys/html/File.html#method-File-bname'><code>dracarys::File$bname()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="initialize"><a href='../../dracarys/html/File.html#method-File-initialize'><code>dracarys::File$initialize()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="print"><a href='../../dracarys/html/File.html#method-File-print'><code>dracarys::File$print()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="type"><a href='../../dracarys/html/File.html#method-File-type'><code>dracarys::File$type()</code></a></span></li>
-</ul>
-</details>
-}}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-VCMetricsFile-read"></a>}}
-\if{latex}{\out{\hypertarget{method-VCMetricsFile-read}{}}}
-\subsection{Method \code{read()}}{
-Reads the \code{vc_metrics.csv} file output from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{VCMetricsFile$read()}\if{html}{\out{</div>}}
-}
-
-\subsection{Returns}{
-tibble with one row and metrics spread across individual columns.
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-VCMetricsFile-write"></a>}}
-\if{latex}{\out{\hypertarget{method-VCMetricsFile-write}{}}}
-\subsection{Method \code{write()}}{
-Writes a tidy version of the \code{vc_metrics.csv} file output
-from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{VCMetricsFile$write(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{out_dir}}{Output directory.}
-
-\item{\code{prefix}}{Prefix of output file(s).}
-
-\item{\code{out_format}}{Format of output file(s).}
-
-\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-VCMetricsFile-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-VCMetricsFile-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{VCMetricsFile$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/WgsContigMeanCovFile.Rd b/man/WgsContigMeanCovFile.Rd
deleted file mode 100644
index 49b974b..0000000
--- a/man/WgsContigMeanCovFile.Rd
+++ /dev/null
@@ -1,146 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dragen.R
-\name{WgsContigMeanCovFile}
-\alias{WgsContigMeanCovFile}
-\title{WgsContigMeanCovFile R6 Class}
-\description{
-Contains methods for reading and displaying contents of
-the \verb{wgs_contig_mean_cov_<phenotype>.csv} file output from DRAGEN.
-This file contains the estimated coverage for all contigs, and an autosomal
-estimated coverage.
-}
-\examples{
-x1 <- system.file("extdata/wgs/SEQC-II.wgs_contig_mean_cov_normal.csv.gz", package = "dracarys")
-x2 <- system.file("extdata/wgs/SEQC-II.wgs_contig_mean_cov_tumor.csv.gz", package = "dracarys")
-cc1 <- WgsContigMeanCovFile$new(x1)
-cc2 <- WgsContigMeanCovFile$new(x2)
-d1 <- cc1$read()
-d2 <- cc2$read()
-cc1$write(d1, out_dir = tempdir(), prefix = "seqc_n", out_format = "tsv")
-cc2$write(d2, out_dir = tempdir(), prefix = "seqc_t", out_format = "tsv")
-
-cc1$plot(d1)
-cc2$plot(d2)
-
-}
-\section{Super class}{
-\code{\link[dracarys:File]{dracarys::File}} -> \code{WgsContigMeanCovFile}
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-WgsContigMeanCovFile-read}{\code{WgsContigMeanCovFile$read()}}
-\item \href{#method-WgsContigMeanCovFile-write}{\code{WgsContigMeanCovFile$write()}}
-\item \href{#method-WgsContigMeanCovFile-plot}{\code{WgsContigMeanCovFile$plot()}}
-\item \href{#method-WgsContigMeanCovFile-clone}{\code{WgsContigMeanCovFile$clone()}}
-}
-}
-\if{html}{\out{
-<details open><summary>Inherited methods</summary>
-<ul>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="bname"><a href='../../dracarys/html/File.html#method-File-bname'><code>dracarys::File$bname()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="initialize"><a href='../../dracarys/html/File.html#method-File-initialize'><code>dracarys::File$initialize()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="print"><a href='../../dracarys/html/File.html#method-File-print'><code>dracarys::File$print()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="type"><a href='../../dracarys/html/File.html#method-File-type'><code>dracarys::File$type()</code></a></span></li>
-</ul>
-</details>
-}}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-WgsContigMeanCovFile-read"></a>}}
-\if{latex}{\out{\hypertarget{method-WgsContigMeanCovFile-read}{}}}
-\subsection{Method \code{read()}}{
-Reads the \verb{wgs_contig_mean_cov_<phenotype>.csv} file output from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{WgsContigMeanCovFile$read(keep_alt = TRUE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{keep_alt}}{Keep the ALT + Mito chromosomes?}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-tibble with the following columns:
-\itemize{
-\item label: file label.
-\item chrom: contig name.
-\item n_bases: number of bases aligned to contig (excludes bases from
-duplicate marked reads, reads with MAPQ=0, and clipped bases).
-\item coverage: col2 / contig length
-}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-WgsContigMeanCovFile-write"></a>}}
-\if{latex}{\out{\hypertarget{method-WgsContigMeanCovFile-write}{}}}
-\subsection{Method \code{write()}}{
-Writes a tidy version of the \verb{wgs_contig_mean_cov_<phenotype>.csv} file output
-from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{WgsContigMeanCovFile$write(
-  d,
-  out_dir = NULL,
-  prefix,
-  out_format = "tsv",
-  drid = NULL
-)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{out_dir}}{Output directory.}
-
-\item{\code{prefix}}{Prefix of output file(s).}
-
-\item{\code{out_format}}{Format of output file(s).}
-
-\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-WgsContigMeanCovFile-plot"></a>}}
-\if{latex}{\out{\hypertarget{method-WgsContigMeanCovFile-plot}{}}}
-\subsection{Method \code{plot()}}{
-Plots the \verb{wgs_contig_mean_cov_<phenotype>.csv} files.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{WgsContigMeanCovFile$plot(d, top_alt_n = 15)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{top_alt_n}}{Number of top covered alt contigs to plot per phenotype.}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-A ggplot2 object with chromosomes on X axis, and coverage on Y axis.
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-WgsContigMeanCovFile-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-WgsContigMeanCovFile-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{WgsContigMeanCovFile$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/WgsCoverageMetricsFile.Rd b/man/WgsCoverageMetricsFile.Rd
deleted file mode 100644
index 322ce83..0000000
--- a/man/WgsCoverageMetricsFile.Rd
+++ /dev/null
@@ -1,105 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dragen.R
-\name{WgsCoverageMetricsFile}
-\alias{WgsCoverageMetricsFile}
-\title{WgsCoverageMetricsFile R6 Class}
-\description{
-Contains methods for reading and displaying contents of
-the \verb{wgs_coverage_metrics_<phenotype>.csv} file output from DRAGEN.
-This file contains read depth of coverage metrics.
-}
-\examples{
-x1 <- system.file("extdata/wgs/SEQC-II.wgs_coverage_metrics_normal.csv.gz", package = "dracarys")
-x2 <- system.file("extdata/wgs/SEQC-II.wgs_coverage_metrics_tumor.csv.gz", package = "dracarys")
-cm1 <- WgsCoverageMetricsFile$new(x1)
-cm2 <- WgsCoverageMetricsFile$new(x2)
-d1 <- read(cm1)
-d2 <- read(cm2)
-cm1$write(d1, out_dir = tempdir(), prefix = "seqc_n", out_format = "tsv")
-cm2$write(d2, out_dir = tempdir(), prefix = "seqc_t", out_format = "tsv")
-
-}
-\section{Super class}{
-\code{\link[dracarys:File]{dracarys::File}} -> \code{WgsCoverageMetricsFile}
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-WgsCoverageMetricsFile-read}{\code{WgsCoverageMetricsFile$read()}}
-\item \href{#method-WgsCoverageMetricsFile-write}{\code{WgsCoverageMetricsFile$write()}}
-\item \href{#method-WgsCoverageMetricsFile-clone}{\code{WgsCoverageMetricsFile$clone()}}
-}
-}
-\if{html}{\out{
-<details open><summary>Inherited methods</summary>
-<ul>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="bname"><a href='../../dracarys/html/File.html#method-File-bname'><code>dracarys::File$bname()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="initialize"><a href='../../dracarys/html/File.html#method-File-initialize'><code>dracarys::File$initialize()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="print"><a href='../../dracarys/html/File.html#method-File-print'><code>dracarys::File$print()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="type"><a href='../../dracarys/html/File.html#method-File-type'><code>dracarys::File$type()</code></a></span></li>
-</ul>
-</details>
-}}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-WgsCoverageMetricsFile-read"></a>}}
-\if{latex}{\out{\hypertarget{method-WgsCoverageMetricsFile-read}{}}}
-\subsection{Method \code{read()}}{
-Reads the \verb{wgs_coverage_metrics_<phenotype>.csv} file output from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{WgsCoverageMetricsFile$read()}\if{html}{\out{</div>}}
-}
-
-\subsection{Returns}{
-tibble with one row and metrics spread across individual columns.
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-WgsCoverageMetricsFile-write"></a>}}
-\if{latex}{\out{\hypertarget{method-WgsCoverageMetricsFile-write}{}}}
-\subsection{Method \code{write()}}{
-Writes a tidy version of the \verb{wgs_coverage_metrics_<phenotype>.csv} file output
-from DRAGEN
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{WgsCoverageMetricsFile$write(
-  d,
-  out_dir = NULL,
-  prefix,
-  out_format = "tsv",
-  drid = NULL
-)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{out_dir}}{Output directory.}
-
-\item{\code{prefix}}{Prefix of output file(s).}
-
-\item{\code{out_format}}{Format of output file(s).}
-
-\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-WgsCoverageMetricsFile-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-WgsCoverageMetricsFile-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{WgsCoverageMetricsFile$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/WgsFineHistFile.Rd b/man/WgsFineHistFile.Rd
deleted file mode 100644
index b3f5407..0000000
--- a/man/WgsFineHistFile.Rd
+++ /dev/null
@@ -1,136 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dragen.R
-\name{WgsFineHistFile}
-\alias{WgsFineHistFile}
-\title{WgsFineHistFile R6 Class}
-\description{
-Contains methods for reading and displaying contents of
-the \verb{wgs_fine_hist_<phenotype>.csv} file output from DRAGEN.
-This file contains two columns: Depth and Overall.
-The value in the Depth column ranges from 0 to 1000+ and the Overall
-column indicates the number of loci covered at the corresponding depth.
-}
-\examples{
-x1 <- system.file("extdata/wgs/SEQC-II.wgs_fine_hist_normal.csv.gz", package = "dracarys")
-x2 <- system.file("extdata/wgs/SEQC-II.wgs_fine_hist_tumor.csv.gz", package = "dracarys")
-ch1 <- WgsFineHistFile$new(x1)
-ch2 <- WgsFineHistFile$new(x2)
-d1 <- read(ch1)
-d2 <- read(ch2)
-ch1$plot(d1)
-ch2$plot(d2)
-ch1$write(d1, out_dir = tempdir(), prefix = "seqc_n", out_format = "tsv")
-ch2$write(d2, out_dir = tempdir(), prefix = "seqc_t", out_format = "tsv")
-}
-\section{Super class}{
-\code{\link[dracarys:File]{dracarys::File}} -> \code{WgsFineHistFile}
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-WgsFineHistFile-read}{\code{WgsFineHistFile$read()}}
-\item \href{#method-WgsFineHistFile-write}{\code{WgsFineHistFile$write()}}
-\item \href{#method-WgsFineHistFile-plot}{\code{WgsFineHistFile$plot()}}
-\item \href{#method-WgsFineHistFile-clone}{\code{WgsFineHistFile$clone()}}
-}
-}
-\if{html}{\out{
-<details open><summary>Inherited methods</summary>
-<ul>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="bname"><a href='../../dracarys/html/File.html#method-File-bname'><code>dracarys::File$bname()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="initialize"><a href='../../dracarys/html/File.html#method-File-initialize'><code>dracarys::File$initialize()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="print"><a href='../../dracarys/html/File.html#method-File-print'><code>dracarys::File$print()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="type"><a href='../../dracarys/html/File.html#method-File-type'><code>dracarys::File$type()</code></a></span></li>
-</ul>
-</details>
-}}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-WgsFineHistFile-read"></a>}}
-\if{latex}{\out{\hypertarget{method-WgsFineHistFile-read}{}}}
-\subsection{Method \code{read()}}{
-Reads the \verb{wgs_fine_hist_<phenotype>.csv} file output from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{WgsFineHistFile$read()}\if{html}{\out{</div>}}
-}
-
-\subsection{Returns}{
-tibble with following columns:
-\itemize{
-\item depth
-\item number of loci with given depth
-}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-WgsFineHistFile-write"></a>}}
-\if{latex}{\out{\hypertarget{method-WgsFineHistFile-write}{}}}
-\subsection{Method \code{write()}}{
-Writes a tidy version of the \verb{wgs_fine_hist_<phenotype>.csv} file output
-from DRAGEN
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{WgsFineHistFile$write(
-  d,
-  out_dir = NULL,
-  prefix,
-  out_format = "tsv",
-  drid = NULL
-)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{out_dir}}{Output directory.}
-
-\item{\code{prefix}}{Prefix of output file(s).}
-
-\item{\code{out_format}}{Format of output file(s).}
-
-\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-WgsFineHistFile-plot"></a>}}
-\if{latex}{\out{\hypertarget{method-WgsFineHistFile-plot}{}}}
-\subsection{Method \code{plot()}}{
-Plots the \verb{wgs_fine_hist_<phenotype>.csv} files.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{WgsFineHistFile$plot(d, x_lim = c(0, 300))}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{x_lim}}{X axis range to plot.}
-}
-\if{html}{\out{</div>}}
-}
-\subsection{Returns}{
-A ggplot2 object with depth of coverage on X axis,
-and number of loci with that depth on Y axis.
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-WgsFineHistFile-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-WgsFineHistFile-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{WgsFineHistFile$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/WgsHistFile.Rd b/man/WgsHistFile.Rd
deleted file mode 100644
index 0fecd69..0000000
--- a/man/WgsHistFile.Rd
+++ /dev/null
@@ -1,94 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dragen.R
-\name{WgsHistFile}
-\alias{WgsHistFile}
-\title{WgsHistFile R6 Class}
-\description{
-Contains methods for reading and displaying contents of
-the \code{wgs_hist.csv} file output from DRAGEN
-}
-\examples{
-x <- system.file("extdata/wgs/SEQC-II.wgs_hist.csv.gz", package = "dracarys")
-h <- WgsHistFile$new(x)
-d <- h$read()
-h$write(d, out_dir = tempdir(), prefix = "seqc_sv", out_format = "tsv")
-
-}
-\section{Super class}{
-\code{\link[dracarys:File]{dracarys::File}} -> \code{WgsHistFile}
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-WgsHistFile-read}{\code{WgsHistFile$read()}}
-\item \href{#method-WgsHistFile-write}{\code{WgsHistFile$write()}}
-\item \href{#method-WgsHistFile-clone}{\code{WgsHistFile$clone()}}
-}
-}
-\if{html}{\out{
-<details open><summary>Inherited methods</summary>
-<ul>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="bname"><a href='../../dracarys/html/File.html#method-File-bname'><code>dracarys::File$bname()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="initialize"><a href='../../dracarys/html/File.html#method-File-initialize'><code>dracarys::File$initialize()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="print"><a href='../../dracarys/html/File.html#method-File-print'><code>dracarys::File$print()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="dracarys" data-topic="File" data-id="type"><a href='../../dracarys/html/File.html#method-File-type'><code>dracarys::File$type()</code></a></span></li>
-</ul>
-</details>
-}}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-WgsHistFile-read"></a>}}
-\if{latex}{\out{\hypertarget{method-WgsHistFile-read}{}}}
-\subsection{Method \code{read()}}{
-Reads the \code{wgs_hist.csv} file output from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{WgsHistFile$read()}\if{html}{\out{</div>}}
-}
-
-\subsection{Returns}{
-tibble with one row and metrics spread across individual columns.
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-WgsHistFile-write"></a>}}
-\if{latex}{\out{\hypertarget{method-WgsHistFile-write}{}}}
-\subsection{Method \code{write()}}{
-Writes a tidy version of the \code{wgs_hist.csv} file output
-from DRAGEN.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{WgsHistFile$write(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{d}}{Parsed object from \code{self$read()}.}
-
-\item{\code{out_dir}}{Output directory.}
-
-\item{\code{prefix}}{Prefix of output file(s).}
-
-\item{\code{out_format}}{Format of output file(s).}
-
-\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-WgsHistFile-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-WgsHistFile-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{WgsHistFile$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/dragen_contig_mean_coverage_plot.Rd b/man/dragen_contig_mean_coverage_plot.Rd
new file mode 100644
index 0000000..2a98030
--- /dev/null
+++ b/man/dragen_contig_mean_coverage_plot.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dragen.R
+\name{dragen_contig_mean_coverage_plot}
+\alias{dragen_contig_mean_coverage_plot}
+\title{Plot DRAGEN Contig Mean Coverage
+TODO
+Plots the \verb{wgs_contig_mean_cov_<phenotype>.csv} files.}
+\usage{
+dragen_contig_mean_coverage_plot(d, top_alt_n = 15)
+}
+\arguments{
+\item{d}{Parsed tibble.}
+
+\item{top_alt_n}{Number of top covered alt contigs to plot per phenotype.}
+}
+\value{
+A ggplot2 object with chromosomes on X axis, and coverage on Y axis.
+}
+\description{
+Plot DRAGEN Contig Mean Coverage
+TODO
+Plots the \verb{wgs_contig_mean_cov_<phenotype>.csv} files.
+}
diff --git a/man/tso_fraglenhist_plot.Rd b/man/dragen_fraglenhist_plot.Rd
similarity index 66%
rename from man/tso_fraglenhist_plot.Rd
rename to man/dragen_fraglenhist_plot.Rd
index 7a6daad..e046df2 100644
--- a/man/tso_fraglenhist_plot.Rd
+++ b/man/dragen_fraglenhist_plot.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/tso.R
-\name{tso_fraglenhist_plot}
-\alias{tso_fraglenhist_plot}
-\title{Plot Fragment Length Hist}
+% Please edit documentation in R/dragen.R
+\name{dragen_fraglenhist_plot}
+\alias{dragen_fraglenhist_plot}
+\title{DRAGEN Fragment Length Hist Plot}
 \usage{
-tso_fraglenhist_plot(d, min_count = 10)
+dragen_fraglenhist_plot(d, min_count = 10)
 }
 \arguments{
 \item{d}{Parsed tibble.}
diff --git a/man/time_metrics_process.Rd b/man/time_metrics_process.Rd
deleted file mode 100644
index 91b6b20..0000000
--- a/man/time_metrics_process.Rd
+++ /dev/null
@@ -1,30 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dragen.R
-\name{time_metrics_process}
-\alias{time_metrics_process}
-\title{Process Multiple TimeMetricsFile Objects}
-\usage{
-time_metrics_process(x, id = seq_len(length(x)))
-}
-\arguments{
-\item{x}{Atomic vector with one or more TimeMetricsFile objects.}
-
-\item{id}{ID for each input, which is used to disambiguate files
-generated from same samples. Default: index from 1 to length of \code{x}.}
-}
-\value{
-tibble with the following columns:
-\itemize{
-\item Step: DRAGEN step
-\item Time: time in HH:MM
-}
-}
-\description{
-Processes multiple TimeMetricsFile objects.
-}
-\examples{
-p <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys")
-x <- TimeMetricsFile$new(p)
-(tm <- time_metrics_process(c(x, x), id = c("run1", "run2")))
-
-}
diff --git a/tests/testthat/test-roxytest-testexamples-dragen.R b/tests/testthat/test-roxytest-testexamples-dragen.R
deleted file mode 100644
index 8db3f7e..0000000
--- a/tests/testthat/test-roxytest-testexamples-dragen.R
+++ /dev/null
@@ -1,13 +0,0 @@
-# Generated by roxytest: do not edit by hand!
-
-# File R/dragen.R: @testexamples
-
-test_that("Function time_metrics_process() @ L1399", {
-  
-  p <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys")
-  x <- TimeMetricsFile$new(p)
-  (tm <- time_metrics_process(c(x, x), id = c("run1", "run2")))
-  
-  expect_equal(nrow(tm), 2)
-})
-

From 91d30162e90e398104726ce07d0835f6de9efae8 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Fri, 18 Oct 2024 01:50:41 +1100
Subject: [PATCH 14/17] dragen: remove per-file R6 classes (R/)

---
 NAMESPACE         |   13 -
 R/dragen.R        | 1123 +++++----------------------------------------
 R/dragen_fastqc.R |   48 --
 R/tso.R           |   28 --
 4 files changed, 104 insertions(+), 1108 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 8350259..6239a8d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,19 +4,11 @@ S3method(read,File)
 export(BcftoolsStatsFile)
 export(BclconvertReports)
 export(BclconvertReports375)
-export(FastqcMetricsFile)
 export(File)
-export(FragmentLengthHistFile)
-export(MappingMetricsFile)
 export(MultiqcFile)
 export(PcgrJsonFile)
 export(PcgrTiersFile)
 export(PloidyEstimationMetricsFile)
-export(ReplayFile)
-export(SvMetricsFile)
-export(TimeMetricsFile)
-export(TrimmerMetricsFile)
-export(VCMetricsFile)
 export(Wf)
 export(Wf_dragen)
 export(Wf_sash)
@@ -25,10 +17,6 @@ export(Wf_tso_ctdna_tumor_only)
 export(Wf_tso_ctdna_tumor_only_v2)
 export(Wf_umccrise)
 export(Wf_umccrise_download_tidy_write)
-export(WgsContigMeanCovFile)
-export(WgsCoverageMetricsFile)
-export(WgsFineHistFile)
-export(WgsHistFile)
 export(bcftools_parse_vcf)
 export(bcftools_parse_vcf_regions)
 export(date_log)
@@ -79,7 +67,6 @@ export(s3_list_files_filter_relevant)
 export(s3_search)
 export(session_info_kable)
 export(tidy_files)
-export(time_metrics_process)
 export(tso_acfc_plot)
 export(tso_acfc_read)
 export(tso_combinedvaro_smallv_read)
diff --git a/R/dragen.R b/R/dragen.R
index 42811f9..90cfd8b 100644
--- a/R/dragen.R
+++ b/R/dragen.R
@@ -1,3 +1,32 @@
+#' DRAGEN Fragment Length Hist Plot
+#'
+#' Plots the fragment length distributions as given in the
+#' `fragment_length_hist` file.
+#'
+#' @param d Parsed tibble.
+#' @param min_count Minimum read count to be plotted (def: 10).
+#'
+#' @return A ggplot2 plot containing fragment lengths on X axis and read counts
+#' on Y axis for each sample.
+dragen_fraglenhist_plot <- function(d, min_count = 10) {
+  assertthat::assert_that(is.numeric(min_count), min_count >= 0)
+  d |>
+    dplyr::filter(.data$Count >= min_count) |>
+    ggplot2::ggplot(ggplot2::aes(x = .data$FragmentLength, y = .data$Count)) +
+    ggplot2::geom_line() +
+    ggplot2::labs(title = "Fragment Length Distribution") +
+    ggplot2::xlab("Fragment Length (bp)") +
+    ggplot2::ylab(glue("Read Count (min: {min_count})")) +
+    ggplot2::theme_minimal() +
+    ggplot2::theme(
+      legend.position = c(0.9, 0.9),
+      legend.justification = c(1, 1),
+      panel.grid.minor = ggplot2::element_blank(),
+      panel.grid.major = ggplot2::element_blank(),
+      plot.title = ggplot2::element_text(colour = "#2c3e50", size = 14, face = "bold")
+    )
+}
+
 #' Read DRAGEN UMI Metrics
 #'
 #' Reads the `umi_metrics.csv` file output from DRAGEN.
@@ -608,577 +637,87 @@ dragen_coverage_metrics_read <- function(x) {
   return(res)
 }
 
-#' WgsContigMeanCovFile R6 Class
-#'
-#' @description
-#' Contains methods for reading and displaying contents of
-#' the `wgs_contig_mean_cov_<phenotype>.csv` file output from DRAGEN.
-#' This file contains the estimated coverage for all contigs, and an autosomal
-#' estimated coverage.
-#'
-#' @examples
-#' x1 <- system.file("extdata/wgs/SEQC-II.wgs_contig_mean_cov_normal.csv.gz", package = "dracarys")
-#' x2 <- system.file("extdata/wgs/SEQC-II.wgs_contig_mean_cov_tumor.csv.gz", package = "dracarys")
-#' cc1 <- WgsContigMeanCovFile$new(x1)
-#' cc2 <- WgsContigMeanCovFile$new(x2)
-#' d1 <- cc1$read()
-#' d2 <- cc2$read()
-#' cc1$write(d1, out_dir = tempdir(), prefix = "seqc_n", out_format = "tsv")
-#' cc2$write(d2, out_dir = tempdir(), prefix = "seqc_t", out_format = "tsv")
-#'
-#' cc1$plot(d1)
-#' cc2$plot(d2)
-#'
-#' @export
-WgsContigMeanCovFile <- R6::R6Class(
-  "WgsContigMeanCovFile",
-  inherit = File,
-  public = list(
-    #' @description
-    #' Reads the `wgs_contig_mean_cov_<phenotype>.csv` file output from DRAGEN.
-    #'
-    #' @param keep_alt Keep the ALT + Mito chromosomes?
-    #' @return tibble with the following columns:
-    #'   - label: file label.
-    #'   - chrom: contig name.
-    #'   - n_bases: number of bases aligned to contig (excludes bases from
-    #'   duplicate marked reads, reads with MAPQ=0, and clipped bases).
-    #'   - coverage: col2 / contig length
-    read = function(keep_alt = TRUE) {
-      x <- self$path
-      readr::read_csv(x, col_names = c("chrom", "n_bases", "coverage"), col_types = "cdd") |>
-        dplyr::filter(
-          if (!keep_alt) {
-            !grepl("chrM|MT|_|Autosomal|HLA-|EBV", .data$chrom)
-          } else {
-            TRUE
-          }
-        )
-    },
-
-    #' @description
-    #' Writes a tidy version of the `wgs_contig_mean_cov_<phenotype>.csv` file output
-    #' from DRAGEN.
-    #'
-    #' @param d Parsed object from `self$read()`.
-    #' @param prefix Prefix of output file(s).
-    #' @param out_dir Output directory.
-    #' @param out_format Format of output file(s).
-    #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
-    write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) {
-      if (!is.null(out_dir)) {
-        prefix <- file.path(out_dir, prefix)
-      }
-      write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
-    },
-
-
-    #' @description Plots the `wgs_contig_mean_cov_<phenotype>.csv` files.
-    #' @param d Parsed object from `self$read()`.
-    #' @param top_alt_n Number of top covered alt contigs to plot per phenotype.
-    #' @return A ggplot2 object with chromosomes on X axis, and coverage on Y axis.
-    plot = function(d, top_alt_n = 15) {
-      assertthat::assert_that(top_alt_n >= 0)
-
-      # Display chr1-22, X, Y at top (M goes to bottom).
-      # Display top 20 of the rest, plus rest as 'other', at bottom
-      main_chrom1 <- c(1:22, "X", "Y")
-      main_chrom2 <- c(paste0("chr", main_chrom1))
-      main_chrom <- c(main_chrom1, main_chrom2, "Autosomal regions")
-      min_cvg <- 0.000001
-
-      d <- d |>
-        dplyr::mutate(
-          panel = dplyr::if_else(.data$chrom %in% main_chrom, "main", "alt"),
-          panel = factor(.data$panel, levels = c("main", "alt"))
-        ) |>
-        dplyr::select("chrom", "coverage", "panel")
-
-      main_panel <- d |> dplyr::filter(.data$panel == "main")
-      alt_panel <- d |> dplyr::filter(.data$panel == "alt")
-      top_alt <- alt_panel |>
-        dplyr::top_n(top_alt_n, wt = .data$coverage) |>
-        dplyr::arrange(dplyr::desc(.data$coverage)) |>
-        dplyr::pull(.data$chrom) |>
-        unique()
-
-      alt_panel2 <- alt_panel |>
-        dplyr::mutate(alt_group = dplyr::if_else(.data$chrom %in% top_alt, "top", "bottom"))
-
-      alt_panel_final <- alt_panel2 |>
-        dplyr::group_by(.data$alt_group) |>
-        dplyr::summarise(mean_cov = mean(.data$coverage)) |>
-        dplyr::inner_join(alt_panel2, by = c("alt_group")) |>
-        dplyr::mutate(
-          chrom = dplyr::if_else(.data$alt_group == "bottom", "OTHER", .data$chrom),
-          coverage = dplyr::if_else(.data$alt_group == "bottom", .data$mean_cov, .data$coverage)
-        ) |>
-        dplyr::distinct() |>
-        dplyr::filter(coverage > min_cvg) |>
-        dplyr::ungroup() |>
-        dplyr::select("chrom", "coverage", "panel")
-
-      chrom_fac_levels <- c(main_chrom, "chrM", "MT", top_alt[!top_alt %in% c("chrM", "MT")], "OTHER")
-      d <- dplyr::bind_rows(main_panel, alt_panel_final) |>
-        dplyr::mutate(chrom = factor(.data$chrom, levels = chrom_fac_levels))
-
-      d |>
-        dplyr::mutate(label = "sampleA") |>
-        ggplot2::ggplot(
-          ggplot2::aes(
-            x = .data$chrom, y = .data$coverage, group = .data$label,
-          )
-        ) +
-        ggplot2::geom_point() +
-        ggplot2::geom_line() +
-        ggplot2::scale_y_continuous(
-          limits = c(0, NA), expand = c(0, 0), labels = scales::comma,
-          breaks = scales::pretty_breaks(n = 8)
-        ) +
-        ggplot2::theme_minimal() +
-        ggplot2::labs(title = "Mean Coverage Per Chromosome", colour = "Label") +
-        ggplot2::xlab("Chromosome") +
-        ggplot2::ylab("Coverage") +
-        ggplot2::theme(
-          legend.position = "top",
-          panel.grid.minor = ggplot2::element_blank(),
-          panel.grid.major.y = ggplot2::element_blank(),
-          strip.background = ggplot2::element_blank(),
-          strip.text.x = ggplot2::element_blank(),
-          axis.text.x = ggplot2::element_text(angle = 45, vjust = 1, hjust = 1, size = 6),
-          plot.title = ggplot2::element_text(colour = "#2c3e50", size = 14, face = "bold"),
-          panel.spacing = ggplot2::unit(2, "lines")
-        ) +
-        ggplot2::facet_wrap(ggplot2::vars(.data$panel), nrow = 2, scales = "free")
-    }
-  )
-)
-
-#' WgsCoverageMetricsFile R6 Class
-#'
-#' @description
-#' Contains methods for reading and displaying contents of
-#' the `wgs_coverage_metrics_<phenotype>.csv` file output from DRAGEN.
-#' This file contains read depth of coverage metrics.
-#'
-#' @examples
-#' x1 <- system.file("extdata/wgs/SEQC-II.wgs_coverage_metrics_normal.csv.gz", package = "dracarys")
-#' x2 <- system.file("extdata/wgs/SEQC-II.wgs_coverage_metrics_tumor.csv.gz", package = "dracarys")
-#' cm1 <- WgsCoverageMetricsFile$new(x1)
-#' cm2 <- WgsCoverageMetricsFile$new(x2)
-#' d1 <- read(cm1)
-#' d2 <- read(cm2)
-#' cm1$write(d1, out_dir = tempdir(), prefix = "seqc_n", out_format = "tsv")
-#' cm2$write(d2, out_dir = tempdir(), prefix = "seqc_t", out_format = "tsv")
-#'
-#' @export
-WgsCoverageMetricsFile <- R6::R6Class(
-  "WgsCoverageMetricsFile",
-  inherit = File,
-  public = list(
-    #' @description
-    #' Reads the `wgs_coverage_metrics_<phenotype>.csv` file output from DRAGEN.
-    #'
-    #' @return tibble with one row and metrics spread across individual columns.
-    read = function() {
-      abbrev_nm <- c(
-        "Aligned bases"                                       = "bases_aligned",
-        "Aligned bases in genome"                             = "bases_aligned_in_genome",
-        "Average alignment coverage over genome"              = "cov_alignment_avg_over_genome",
-        "Uniformity of coverage (PCT > 0.2*mean) over genome" = "cov_uniformity_over_genome_pct_gt02mean",
-        "Uniformity of coverage (PCT > 0.4*mean) over genome" = "cov_uniformity_over_genome_pct_gt04mean",
-        "Average chr X coverage over genome"                  = "cov_avg_x_over_genome",
-        "Average chr Y coverage over genome"                  = "cov_avg_y_over_genome",
-        "Average mitochondrial coverage over genome"          = "cov_avg_mt_over_genome",
-        "Average autosomal coverage over genome"              = "cov_avg_auto_over_genome",
-        "Median autosomal coverage over genome"               = "cov_median_auto_over_genome",
-        "Mean/Median autosomal coverage ratio over genome"    = "cov_mean_median_auto_ratio_over_genome",
-        "Aligned reads"                                       = "reads_aligned",
-        "Aligned reads in genome"                             = "reads_aligned_in_genome"
-      )
-
-      x <- self$path
-      raw <- readr::read_lines(x)
-      assertthat::assert_that(grepl("COVERAGE SUMMARY", raw[1]))
-
-      res <- raw |>
-        tibble::as_tibble_col(column_name = "value") |>
-        tidyr::separate_wider_delim(
-          "value",
-          delim = ",", too_few = "align_start",
-          names = c("category", "dummy1", "var", "value", "pct")
-        )
-      # split to rename the
-      # "PCT of genome with coverage [100x: inf)" values
-      res1 <- res |>
-        # pct just shows 100% for a couple rows
-        dplyr::filter(!grepl("PCT of genome with coverage", .data$var)) |>
-        dplyr::select("var", "value")
-      res2 <- res |>
-        dplyr::filter(grepl("PCT of genome with coverage", .data$var)) |>
-        dplyr::mutate(
-          var = sub("PCT of genome with coverage ", "", .data$var),
-          var = gsub("\\[|\\]|\\(|\\)| ", "", .data$var),
-          var = gsub("x", "", .data$var),
-          var = gsub("inf", "Inf", .data$var)
-        ) |>
-        tidyr::separate_wider_delim("var", names = c("start", "end"), delim = ":") |>
-        dplyr::mutate(var = as.character(glue("cov_genome_pct_{start}_{end}"))) |>
-        dplyr::select("var", "value")
-      res <- dplyr::bind_rows(res1, res2) |>
-        dplyr::mutate(
-          value = dplyr::na_if(.data$value, "NA"),
-          value = as.numeric(.data$value),
-          var = dplyr::recode(.data$var, !!!abbrev_nm)
-        ) |>
-        tidyr::pivot_wider(names_from = "var", values_from = "value")
-    },
-    #' @description
-    #' Writes a tidy version of the `wgs_coverage_metrics_<phenotype>.csv` file output
-    #' from DRAGEN
-    #'
-    #' @param d Parsed object from `self$read()`.
-    #' @param prefix Prefix of output file(s).
-    #' @param out_dir Output directory.
-    #' @param out_format Format of output file(s).
-    #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
-    write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) {
-      if (!is.null(out_dir)) {
-        prefix <- file.path(out_dir, prefix)
-      }
-      write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
-    }
-  )
-)
-
-#' WgsFineHistFile R6 Class
-#'
-#' @description
-#' Contains methods for reading and displaying contents of
-#' the `wgs_fine_hist_<phenotype>.csv` file output from DRAGEN.
-#' This file contains two columns: Depth and Overall.
-#' The value in the Depth column ranges from 0 to 1000+ and the Overall
-#' column indicates the number of loci covered at the corresponding depth.
-#'
-#' @examples
-#' x1 <- system.file("extdata/wgs/SEQC-II.wgs_fine_hist_normal.csv.gz", package = "dracarys")
-#' x2 <- system.file("extdata/wgs/SEQC-II.wgs_fine_hist_tumor.csv.gz", package = "dracarys")
-#' ch1 <- WgsFineHistFile$new(x1)
-#' ch2 <- WgsFineHistFile$new(x2)
-#' d1 <- read(ch1)
-#' d2 <- read(ch2)
-#' ch1$plot(d1)
-#' ch2$plot(d2)
-#' ch1$write(d1, out_dir = tempdir(), prefix = "seqc_n", out_format = "tsv")
-#' ch2$write(d2, out_dir = tempdir(), prefix = "seqc_t", out_format = "tsv")
-#' @export
-WgsFineHistFile <- R6::R6Class(
-  "WgsFineHistFile",
-  inherit = File,
-  public = list(
-    #' @description
-    #' Reads the `wgs_fine_hist_<phenotype>.csv` file output from DRAGEN.
-    #' @return tibble with following columns:
-    #'   - depth
-    #'   - number of loci with given depth
-    read = function() {
-      x <- self$path
-      d <- readr::read_csv(x, col_types = "cd")
-      assertthat::assert_that(all(colnames(d) == c("Depth", "Overall")))
-
-      # there's a max Depth of 2000+, so convert to numeric for easier plotting
-      d |>
-        dplyr::mutate(
-          Depth = ifelse(grepl("+", .data$Depth), sub("(\\d*)\\+", "\\1", .data$Depth), .data$Depth),
-          Depth = as.integer(.data$Depth)
-        ) |>
-        dplyr::select(depth = "Depth", n_loci = "Overall")
-    },
-    #' @description
-    #' Writes a tidy version of the `wgs_fine_hist_<phenotype>.csv` file output
-    #' from DRAGEN
-    #'
-    #' @param d Parsed object from `self$read()`.
-    #' @param prefix Prefix of output file(s).
-    #' @param out_dir Output directory.
-    #' @param out_format Format of output file(s).
-    #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
-    write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) {
-      if (!is.null(out_dir)) {
-        prefix <- file.path(out_dir, prefix)
-      }
-      write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
-    },
-
-    #' @description Plots the `wgs_fine_hist_<phenotype>.csv` files.
-    #' @param d Parsed object from `self$read()`.
-    #' @param x_lim X axis range to plot.
-    #' @return A ggplot2 object with depth of coverage on X axis,
-    #' and number of loci with that depth on Y axis.
-    plot = function(d, x_lim = c(0, 300)) {
-      assertthat::assert_that(length(x_lim) == 2)
-      d |>
-        ggplot2::ggplot(ggplot2::aes(x = .data$depth, y = .data$n_loci)) +
-        ggplot2::geom_line() +
-        ggplot2::coord_cartesian(xlim = x_lim) +
-        ggplot2::scale_y_continuous(labels = scales::label_comma()) +
-        ggplot2::scale_x_continuous(n.breaks = 8) +
-        ggplot2::labs(title = "Coverage Distribution", colour = "Label") +
-        ggplot2::xlab("Depth of Coverage") +
-        ggplot2::ylab("Number of Loci with Given Coverage") +
-        ggplot2::theme_minimal() +
-        ggplot2::theme(
-          legend.position = c(0.9, 0.9),
-          legend.justification = c(1, 1),
-          panel.grid.minor = ggplot2::element_blank(),
-          panel.grid.major.y = ggplot2::element_blank(),
-          plot.title = ggplot2::element_text(colour = "#2c3e50", size = 14, face = "bold")
-        )
-    }
-  )
-)
-
-#' FragmentLengthHistFile R6 Class
-#'
-#' @description
-#' Contains methods for reading and plotting contents of
-#' the `fragment_length_hist.csv` file output from DRAGEN.
+#' Plot DRAGEN Contig Mean Coverage
+#' TODO
+#' Plots the `wgs_contig_mean_cov_<phenotype>.csv` files.
 #'
-#' @examples
-#' x <- system.file("extdata/wgs/SEQC-II.fragment_length_hist.csv.gz", package = "dracarys")
-#' fl <- FragmentLengthHistFile$new(x)
-#' d <- fl$read() # or read(fl)
-#' fl$plot(d) # or plot(fl)
-#' fl$write(d |> dplyr::filter(count > 10), out_dir = tempdir(), prefix = "seqc_fl")
-#' @export
-FragmentLengthHistFile <- R6::R6Class(
-  "FragmentLengthHistFile",
-  inherit = File,
-  public = list(
-    #' @description Reads the `fragment_length_hist.csv` file, which contains the
-    #' fragment length distribution for each sample.
-    #' @return A tibble with the following columns:
-    #' - sample: name of sample
-    #' - fragmentLength: estimated fragment length
-    #' - count: number of reads with estimated fragment length
-    read = function() {
-      x <- self$path
-      d <- readr::read_lines(x)
-      assertthat::assert_that(grepl("#Sample", d[1]))
-
-      d |>
-        tibble::enframe() |>
-        dplyr::mutate(
-          sample = dplyr::if_else(
-            grepl("#Sample", .data$value),
-            sub("#Sample: (.*)", "\\1", .data$value),
-            NA_character_
-          )
-        ) |>
-        tidyr::fill("sample", .direction = "down") |>
-        dplyr::filter(!grepl("#Sample: |FragmentLength,Count", .data$value)) |>
-        tidyr::separate_wider_delim(cols = "value", names = c("fragmentLength", "count"), delim = ",") |>
-        dplyr::mutate(
-          count = as.numeric(.data$count),
-          fragmentLength = as.numeric(.data$fragmentLength)
-        ) |>
-        dplyr::select("sample", "fragmentLength", "count")
-    },
-    #' @description
-    #' Writes a tidy version of the `fragment_length_hist.csv` file output
-    #' from DRAGEN.
-    #'
-    #' @param d Parsed object from `self$read()`.
-    #' @param prefix Prefix of output file(s).
-    #' @param out_dir Output directory.
-    #' @param out_format Format of output file(s).
-    #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
-    write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) {
-      if (!is.null(out_dir)) {
-        prefix <- file.path(out_dir, prefix)
-      }
-      write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
-    },
+#' @param d Parsed tibble.
+#' @param top_alt_n Number of top covered alt contigs to plot per phenotype.
+#' @return A ggplot2 object with chromosomes on X axis, and coverage on Y axis.
+dragen_contig_mean_coverage_plot <- function(d, top_alt_n = 15) {
+  assertthat::assert_that(top_alt_n >= 0)
 
+  # Display chr1-22, X, Y at top (M goes to bottom).
+  # Display top 20 of the rest, plus rest as 'other', at bottom
+  main_chrom1 <- c(1:22, "X", "Y")
+  main_chrom2 <- c(paste0("chr", main_chrom1))
+  main_chrom <- c(main_chrom1, main_chrom2, "Autosomal regions")
+  min_cvg <- 0.000001
 
-    #' @description Plots the fragment length distributions as given in the
-    #' `fragment_length_hist.csv` file.
-    #'
-    #' @param d Parsed object from `self$read()`.
-    #' @param min_count Minimum read count to be plotted (Default: 10).
-    #' @return A ggplot2 plot containing fragment lengths on X axis and read counts
-    #'   on Y axis for each sample.
-    plot = function(d, min_count = 10) {
-      assertthat::assert_that(min_count >= 0)
-      d <- d |>
-        dplyr::filter(.data$count >= min_count)
+  d <- d |>
+    dplyr::mutate(
+      panel = dplyr::if_else(.data$chrom %in% main_chrom, "main", "alt"),
+      panel = factor(.data$panel, levels = c("main", "alt"))
+    ) |>
+    dplyr::select("chrom", "coverage", "panel")
+
+  main_panel <- d |> dplyr::filter(.data$panel == "main")
+  alt_panel <- d |> dplyr::filter(.data$panel == "alt")
+  top_alt <- alt_panel |>
+    dplyr::top_n(top_alt_n, wt = .data$coverage) |>
+    dplyr::arrange(dplyr::desc(.data$coverage)) |>
+    dplyr::pull(.data$chrom) |>
+    unique()
+
+  alt_panel2 <- alt_panel |>
+    dplyr::mutate(alt_group = dplyr::if_else(.data$chrom %in% top_alt, "top", "bottom"))
+
+  alt_panel_final <- alt_panel2 |>
+    dplyr::group_by(.data$alt_group) |>
+    dplyr::summarise(mean_cov = mean(.data$coverage)) |>
+    dplyr::inner_join(alt_panel2, by = c("alt_group")) |>
+    dplyr::mutate(
+      chrom = dplyr::if_else(.data$alt_group == "bottom", "OTHER", .data$chrom),
+      coverage = dplyr::if_else(.data$alt_group == "bottom", .data$mean_cov, .data$coverage)
+    ) |>
+    dplyr::distinct() |>
+    dplyr::filter(.data$coverage > min_cvg) |>
+    dplyr::ungroup() |>
+    dplyr::select("chrom", "coverage", "panel")
 
-      d |>
-        ggplot2::ggplot(ggplot2::aes(x = .data$fragmentLength, y = .data$count, colour = sample)) +
-        ggplot2::geom_line() +
-        ggplot2::labs(title = "Fragment Length Distribution") +
-        ggplot2::xlab("Fragment Length (bp)") +
-        ggplot2::ylab(glue("Read Count (min: {min_count})")) +
-        ggplot2::theme_minimal() +
-        ggplot2::theme(
-          legend.position = c(0.9, 0.9),
-          legend.justification = c(1, 1),
-          panel.grid.minor = ggplot2::element_blank(),
-          panel.grid.major = ggplot2::element_blank(),
-          plot.title = ggplot2::element_text(colour = "#2c3e50", size = 14, face = "bold")
-        )
-    }
-  )
-)
+  chrom_fac_levels <- c(main_chrom, "chrM", "MT", top_alt[!top_alt %in% c("chrM", "MT")], "OTHER")
+  d <- dplyr::bind_rows(main_panel, alt_panel_final) |>
+    dplyr::mutate(chrom = factor(.data$chrom, levels = chrom_fac_levels))
 
-#' MappingMetricsFile R6 Class
-#'
-#' @description
-#' Contains methods for reading and displaying contents of
-#' the `mapping_metrics.csv` file output from DRAGEN.
-#' This file contains mapping and aligning metrics, like the metrics computed by
-#' the Samtools Flagstat command. These metrics are available on an aggregate
-#' level (over all input data), and on a per read group level. NOTE: we are
-#' keeping only the read group level metrics (i.e. removing the aggregate data).
-#' Unless explicitly stated, the metrics units are in reads (i.e., not in
-#' terms of pairs or alignments).
-#'
-#' @examples
-#' x <- system.file("extdata/wgs/SEQC-II.mapping_metrics.csv.gz", package = "dracarys")
-#' mm <- MappingMetricsFile$new(x)
-#' d <- mm$read() # or read(mm)
-#' mm$write(d, out_dir = tempdir(), prefix = "seqc_mm", out_format = "tsv")
-#'
-#' @export
-MappingMetricsFile <- R6::R6Class(
-  "MappingMetricsFile",
-  inherit = File,
-  public = list(
-    #' @description
-    #' Reads the `mapping_metrics.csv` file output from DRAGEN.
-    #'
-    #' @return tibble with one row of X metrics per read group.
-    read = function() {
-      abbrev_nm <- c(
-        "Total input reads" = "reads_tot_input",
-        "Number of duplicate marked reads" = "reads_num_dupmarked",
-        "Number of unique reads (excl. duplicate marked reads)" = "reads_num_uniq",
-        "Reads with mate sequenced" = "reads_w_mate_seq",
-        "Reads without mate sequenced" = "reads_wo_mate_seq",
-        "QC-failed reads" = "reads_qcfail",
-        "Mapped reads adjusted for excluded mapping" = "reads_mapped_adjexcl",
-        "Mapped reads adjusted for filtered and excluded mapping" = "reads_mapped_adjfiltexcl",
-        "Unmapped reads adjusted for excluded mapping" = "reads_unmapped_adjexcl",
-        "Unmapped reads adjusted for filtered and excluded mapping" = "reads_unmapped_adjfiltexcl",
-        "Reads mapping to multiple locations" = "reads_map_multiloc",
-        "Hard-clipped bases R1" = "bases_hardclip_r1",
-        "Hard-clipped bases R2" = "bases_hardclip_r2",
-        "Soft-clipped bases" = "bases_softclip",
-        "Hard-clipped bases" = "bases_hardclip",
-        "Mapped reads" = "reads_mapped",
-        "Mapped reads adjusted for filtered mapping" = "reads_mapped_adjfilt",
-        "Mapped reads R1" = "reads_mapped_r1",
-        "Mapped reads R2" = "reads_mapped_r2",
-        "Number of unique & mapped reads (excl. duplicate marked reads)" = "reads_num_uniq_mapped",
-        "Unmapped reads" = "reads_unmapped",
-        "Unmapped reads adjusted for filtered mapping" = "reads_unmapped_adjfilt",
-        "Adjustment of reads matching non-reference decoys" = "reads_match_nonref_decoys_adj",
-        "Adjustment of reads matching filter contigs" = "reads_match_filt_contig_adj",
-        "Singleton reads (itself mapped; mate unmapped)" = "reads_singleton",
-        "Paired reads (itself & mate mapped)" = "reads_paired",
-        "Properly paired reads" = "reads_paired_proper",
-        "Not properly paired reads (discordant)" = "reads_discordant",
-        "Paired reads mapped to different chromosomes" = "reads_paired_mapped_diff_chrom",
-        "Paired reads mapped to different chromosomes (MAPQ>=10)" = "reads_paired_mapped_diff_chrom_mapq10",
-        "Reads with MAPQ [40:inf)" = "reads_mapq_40_inf",
-        "Reads with MAPQ [30:40)" = "reads_mapq_30_40",
-        "Reads with MAPQ [20:30)" = "reads_mapq_20_30",
-        "Reads with MAPQ [10:20)" = "reads_mapq_10_20",
-        "Reads with MAPQ [ 0:10)" = "reads_mapq_0_10",
-        "Reads with MAPQ NA (Unmapped reads)" = "reads_mapq_na_unmapped",
-        "Reads with indel R1" = "reads_indel_r1",
-        "Reads with indel R2" = "reads_indel_r2",
-        "Reads with splice junction" = "reads_splicejunc",
-        "Total bases" = "bases_tot",
-        "Total bases R1" = "bases_tot_r1",
-        "Total bases R2" = "bases_tot_r2",
-        "Mapped bases" = "bases_mapped",
-        "Mapped bases R1" = "bases_mapped_r1",
-        "Mapped bases R2" = "bases_mapped_r2",
-        "Soft-clipped bases R1" = "bases_softclip_r1",
-        "Soft-clipped bases R2" = "bases_softclip_r2",
-        "Mismatched bases R1" = "bases_mismatched_r1",
-        "Mismatched bases R2" = "bases_mismatched_r2",
-        "Mismatched bases R1 (excl. indels)" = "bases_mismatched_r1_noindels",
-        "Mismatched bases R2 (excl. indels)" = "bases_mismatched_r2_noindels",
-        "Q30 bases" = "bases_q30",
-        "Q30 bases R1" = "bases_q30_r1",
-        "Q30 bases R2" = "bases_q30_r2",
-        "Q30 bases (excl. dups & clipped bases)" = "bases_q30_nodups_noclipped",
-        "Total alignments" = "alignments_tot",
-        "Secondary alignments" = "alignments_secondary",
-        "Supplementary (chimeric) alignments" = "alignments_chimeric",
-        "Estimated read length" = "read_len",
-        "Bases in reference genome" = "bases_in_ref_genome",
-        "Bases in target bed [% of genome]" = "bases_in_target_bed_genome_pct",
-        "Average sequenced coverage over genome" = "cov_avg_seq_over_genome",
-        "Insert length: mean" = "insert_len_mean",
-        "Insert length: median" = "insert_len_median",
-        "Insert length: standard deviation" = "insert_len_std_dev",
-        "Provided sex chromosome ploidy" = "ploidy_sex_chrom_provided",
-        "Estimated sample contamination" = "contamination_est",
-        "DRAGEN mapping rate [mil. reads/second]" = "mapping_rate_dragen_milreads_per_sec",
-        "Number of duplicate marked and mate reads removed" = "reads_num_dupmarked_mate_reads_removed",
-        "Total reads in RG" = "reads_tot_rg",
-        "Filtered rRNA reads" = "reads_rrna_filtered"
+  d |>
+    dplyr::mutate(label = "sampleA") |>
+    ggplot2::ggplot(
+      ggplot2::aes(
+        x = .data$chrom, y = .data$coverage, group = .data$label,
       )
-      x <- self$path
-      raw <- readr::read_lines(x)
-      assertthat::assert_that(grepl("MAPPING/ALIGNING", raw[1]))
-      # tidy
-      d <- raw |>
-        tibble::as_tibble_col(column_name = "value") |>
-        tidyr::separate_wider_delim(
-          "value",
-          names = c("category", "RG", "var", "count", "pct"),
-          delim = ",", too_few = "align_start"
-        ) |>
-        dplyr::filter(.data$RG != "") |>
-        dplyr::mutate(
-          count = dplyr::na_if(.data$count, "NA"),
-          count = as.numeric(.data$count),
-          pct = as.numeric(.data$pct),
-          var = dplyr::recode(.data$var, !!!abbrev_nm)
-        ) |>
-        dplyr::select("RG", "var", "count", "pct")
-      # pivot
-      d |>
-        tidyr::pivot_longer(c("count", "pct")) |>
-        dplyr::mutate(
-          name = dplyr::if_else(.data$name == "count", "", "_pct"),
-          var = glue("{.data$var}{.data$name}")
-        ) |>
-        dplyr::select("RG", "var", "value") |>
-        dplyr::filter(!is.na(.data$value)) |>
-        tidyr::pivot_wider(names_from = "var", values_from = "value")
-    },
-    #' @description
-    #' Writes a tidy version of the `mapping_metrics.csv` file output
-    #' from DRAGEN.
-    #'
-    #' @param d Parsed object from `self$read()`.
-    #' @param prefix Prefix of output file(s).
-    #' @param out_dir Output directory.
-    #' @param out_format Format of output file(s).
-    #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
-    write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) {
-      if (!is.null(out_dir)) {
-        prefix <- file.path(out_dir, prefix)
-      }
-      write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
-    }
-  )
-)
+    ) +
+    ggplot2::geom_point() +
+    ggplot2::geom_line() +
+    ggplot2::scale_y_continuous(
+      limits = c(0, NA), expand = c(0, 0), labels = scales::comma,
+      breaks = scales::pretty_breaks(n = 8)
+    ) +
+    ggplot2::theme_minimal() +
+    ggplot2::labs(title = "Mean Coverage Per Chromosome", colour = "Label") +
+    ggplot2::xlab("Chromosome") +
+    ggplot2::ylab("Coverage") +
+    ggplot2::theme(
+      legend.position = "top",
+      panel.grid.minor = ggplot2::element_blank(),
+      panel.grid.major.y = ggplot2::element_blank(),
+      strip.background = ggplot2::element_blank(),
+      strip.text.x = ggplot2::element_blank(),
+      axis.text.x = ggplot2::element_text(angle = 45, vjust = 1, hjust = 1, size = 6),
+      plot.title = ggplot2::element_text(colour = "#2c3e50", size = 14, face = "bold"),
+      panel.spacing = ggplot2::unit(2, "lines")
+    ) +
+    ggplot2::facet_wrap(ggplot2::vars(.data$panel), nrow = 2, scales = "free")
+}
 
 #' PloidyEstimationMetricsFile R6 Class
 #'
@@ -1267,460 +806,6 @@ PloidyEstimationMetricsFile <- R6::R6Class(
   )
 )
 
-#' ReplayFile R6 Class
-#'
-#' @description
-#' Contains methods for reading contents of
-#' the `replay.json` file output from DRAGEN, which contains the DRAGEN command
-#' line, parameters and version for the specific run.
-#'
-#' @examples
-#' x <- system.file("extdata/wgs/SEQC-II-replay.json.gz", package = "dracarys")
-#' r <- ReplayFile$new(x)
-#' d <- r$read() # or read(r)
-#' r$write(d, out_dir = tempdir(), prefix = "seqc_replay", out_format = "tsv")
-#' @export
-ReplayFile <- R6::R6Class(
-  "ReplayFile",
-  inherit = File,
-  public = list(
-    #' @description Reads the `replay.json` file.
-    #' @return tibble with one row and metrics spread across individual columns.
-    read = function() {
-      x <- self$path
-      res <- x |>
-        jsonlite::read_json(simplifyVector = TRUE) |>
-        purrr::map_if(is.data.frame, tibble::as_tibble)
-
-      req_elements <- c("command_line", "hash_table_build", "dragen_config", "system")
-      assertthat::assert_that(all(names(res) %in% req_elements))
-      res[["system"]] <- res[["system"]] |>
-        tibble::as_tibble_row()
-      res[["hash_table_build"]] <- res[["hash_table_build"]] |>
-        tibble::as_tibble_row()
-      # we don't care if the columns are characters, no analysis likely to be done on dragen options
-      # (though never say never!)
-      res[["dragen_config"]] <- res[["dragen_config"]] |>
-        tidyr::pivot_wider(names_from = "name", values_from = "value")
-
-      dplyr::bind_cols(res)
-    },
-    #' @description
-    #' Writes a tidy version of the `replay.json` file output
-    #' from DRAGEN.
-    #'
-    #' @param d Parsed object from `self$read()`.
-    #' @param prefix Prefix of output file(s).
-    #' @param out_dir Output directory.
-    #' @param out_format Format of output file(s).
-    #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
-    write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) {
-      if (!is.null(out_dir)) {
-        prefix <- file.path(out_dir, prefix)
-      }
-      write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
-    }
-  )
-)
-
-#' TimeMetricsFile R6 Class
-#'
-#' @description
-#' Contains methods for reading contents of
-#' the `time_metrics.csv` file output from DRAGEN, which contains
-#' a breakdown of the run duration for each DRAGEN process.
-#'
-#' @examples
-#' x <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys")
-#' tm <- TimeMetricsFile$new(x)
-#' d <- tm$read() # or read(tm)
-#' tm$write(d, out_dir = tempdir(), prefix = "seqc_time", out_format = "tsv")
-#' @export
-TimeMetricsFile <- R6::R6Class(
-  "TimeMetricsFile",
-  inherit = File,
-  public = list(
-    #' @description Reads the `time_metrics.csv` file.
-    #' @return tibble with one row and metrics spread across individual columns.
-    read = function() {
-      x <- self$path
-      cn <- c("dummy1", "dummy2", "Step", "time_hrs", "time_sec")
-      ct <- readr::cols(.default = "c", time_hrs = readr::col_time(format = "%T"), time_sec = "d")
-      d <- readr::read_csv(x, col_names = cn, col_types = ct)
-      assertthat::assert_that(d$dummy1[1] == "RUN TIME", is.na(d$dummy2[1]))
-      assertthat::assert_that(inherits(d$time_hrs, "hms"))
-      d |>
-        dplyr::mutate(
-          Step = tools::toTitleCase(sub("Time ", "", .data$Step)),
-          Time = substr(.data$time_hrs, 1, 5)
-        ) |>
-        dplyr::select("Step", "Time") |>
-        tidyr::pivot_wider(names_from = "Step", values_from = "Time") |>
-        dplyr::relocate("Total Runtime")
-    },
-    #' @description
-    #' Writes a tidy version of the `time_metrics.csv` file output
-    #' from DRAGEN.
-    #'
-    #' @param d Parsed object from `self$read()`.
-    #' @param prefix Prefix of output file(s).
-    #' @param out_dir Output directory.
-    #' @param out_format Format of output file(s).
-    #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
-    write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) {
-      if (!is.null(out_dir)) {
-        prefix <- file.path(out_dir, prefix)
-      }
-      write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
-    }
-  )
-)
-
-#' Process Multiple TimeMetricsFile Objects
-#'
-#' Processes multiple TimeMetricsFile objects.
-#'
-#' @param x Atomic vector with one or more TimeMetricsFile objects.
-#' @param id ID for each input, which is used to disambiguate files
-#' generated from same samples. Default: index from 1 to length of `x`.
-#' @return tibble with the following columns:
-#'   - Step: DRAGEN step
-#'   - Time: time in HH:MM
-#'
-#' @examples
-#' p <- system.file("extdata/wgs/SEQC-II.time_metrics.csv.gz", package = "dracarys")
-#' x <- TimeMetricsFile$new(p)
-#' (tm <- time_metrics_process(c(x, x), id = c("run1", "run2")))
-#'
-#' @testexamples
-#' expect_equal(nrow(tm), 2)
-#'
-#' @export
-time_metrics_process <- function(x, id = seq_len(length(x))) {
-  assertthat::assert_that(all(purrr::map_lgl(x, ~ inherits(.x, "TimeMetricsFile"))))
-  x |>
-    purrr::map(read) |>
-    purrr::set_names(id) |>
-    dplyr::bind_rows(.id = "ID")
-}
-
-#' VCMetricsFile R6 Class
-#'
-#' @description
-#' Contains methods for reading and displaying contents of
-#' the `vc_metrics.csv` file output from DRAGEN, which contains variant calling metrics.
-#' @examples
-#' x <- system.file("extdata/wgs/SEQC-II.vc_metrics.csv.gz", package = "dracarys")
-#' vm <- VCMetricsFile$new(x)
-#' d <- vm$read() # or read(vm)
-#' vm$write(d, out_dir = tempdir(), prefix = "seqc_vc", out_format = "tsv")
-#'
-#' @export
-VCMetricsFile <- R6::R6Class(
-  "VCMetricsFile",
-  inherit = File,
-  public = list(
-    #' @description
-    #' Reads the `vc_metrics.csv` file output from DRAGEN.
-    #'
-    #' @return tibble with one row and metrics spread across individual columns.
-    read = function() {
-      abbrev_nm <- c(
-        "Total" = "var_tot",
-        "Biallelic" = "var_biallelic",
-        "Multiallelic" = "var_multiallelic",
-        "SNPs" = "var_snp",
-        "Insertions (Hom)" = "var_ins_hom",
-        "Insertions (Het)" = "var_ins_het",
-        "Deletions (Hom)" = "var_del_hom",
-        "Deletions (Het)" = "var_del_het",
-        "Indels (Het)" = "var_indel_het",
-        "Chr X number of SNPs over genome" = "var_snp_x_over_genome",
-        "Chr Y number of SNPs over genome" = "var_snp_y_over_genome",
-        "(Chr X SNPs)/(chr Y SNPs) ratio over genome" = "var_x_over_y_snp_ratio_over_genome",
-        "SNP Transitions" = "var_snp_transitions",
-        "SNP Transversions" = "var_snp_transversions",
-        "Ti/Tv ratio" = "var_ti_tv_ratio",
-        "Heterozygous" = "var_heterozygous",
-        "Homozygous" = "var_homozygous",
-        "Het/Hom ratio" = "var_het_hom_ratio",
-        "In dbSNP" = "var_in_dbsnp",
-        "Not in dbSNP" = "var_nin_dbsnp",
-        "Percent Callability" = "callability_pct",
-        "Percent Autosome Callability" = "callability_auto_pct",
-        "Number of samples" = "sample_num",
-        "Reads Processed" = "reads_processed",
-        "Child Sample" = "sample_child"
-      )
-      x <- self$path
-      raw <- readr::read_lines(x)
-      assertthat::assert_that(grepl("VARIANT CALLER", raw[1]))
-      # tidy
-      d <- raw |>
-        tibble::as_tibble_col(column_name = "value") |>
-        tidyr::separate_wider_delim(
-          "value",
-          names = c("category", "sample", "var", "count", "pct"),
-          delim = ",", too_few = "align_start"
-        ) |>
-        dplyr::mutate(
-          var = dplyr::recode(.data$var, !!!abbrev_nm),
-          count = dplyr::na_if(.data$count, "NA"),
-          count = as.numeric(.data$count),
-          pct = round(as.numeric(.data$pct), 2),
-          category = dplyr::case_when(
-            grepl("SUMMARY", .data$category) ~ "summary",
-            grepl("PREFILTER", .data$category) ~ "prefilter",
-            grepl("POSTFILTER", .data$category) ~ "postfilter",
-            TRUE ~ "unknown"
-          )
-        ) |>
-        dplyr::filter(.data$category != "summary") |>
-        dplyr::select("category", "sample", "var", "count", "pct")
-      # pivot
-      d |>
-        tidyr::pivot_longer(c("count", "pct")) |>
-        dplyr::mutate(
-          name = dplyr::if_else(.data$name == "count", "", "_pct"),
-          var = glue("{.data$var}{.data$name}")
-        ) |>
-        dplyr::select("category", "sample", "var", "value") |>
-        dplyr::filter(!is.na(.data$value)) |>
-        tidyr::pivot_wider(names_from = "var", values_from = "value")
-    },
-    #' @description
-    #' Writes a tidy version of the `vc_metrics.csv` file output
-    #' from DRAGEN.
-    #'
-    #' @param d Parsed object from `self$read()`.
-    #' @param prefix Prefix of output file(s).
-    #' @param out_dir Output directory.
-    #' @param out_format Format of output file(s).
-    #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
-    write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) {
-      if (!is.null(out_dir)) {
-        prefix <- file.path(out_dir, prefix)
-      }
-      write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
-    }
-  )
-)
-
-#' TrimmerMetricsFile R6 Class
-#'
-#' @description
-#' Contains methods for reading and displaying contents of
-#' the `trimmer_metrics.csv` file output from DRAGEN
-#'
-#' @examples
-#' x <- system.file("extdata/wgs/SEQC-II.trimmer_metrics.csv.gz", package = "dracarys")
-#' tm <- TrimmerMetricsFile$new(x)
-#' d <- tm$read()
-#' tm$write(d, out_dir = tempdir(), prefix = "seqc_tm", out_format = "tsv")
-#'
-#' @export
-TrimmerMetricsFile <- R6::R6Class(
-  "TrimmerMetricsFile",
-  inherit = File,
-  public = list(
-    #' @description
-    #' Reads the `trimmer_metrics.csv` file output from DRAGEN.
-    #'
-    #' @return tibble with one row and metrics spread across individual columns.
-    read = function() {
-      x <- self$path
-      d <- readr::read_lines(x)
-      assertthat::assert_that(grepl("TRIMMER STATISTICS", d[1]))
-      abbrev_nm <- c(
-        "Total input reads"                              = "reads_tot_input",
-        "Total input bases"                              = "bases_tot",
-        "Total input bases R1"                           = "bases_r1",
-        "Total input bases R2"                           = "bases_r2",
-        "Average input read length"                      = "read_len_avg",
-        "Total trimmed reads"                            = "reads_trimmed_tot",
-        "Total trimmed bases"                            = "bases_trimmed_tot",
-        "Average bases trimmed per read"                 = "bases_trimmed_avg_per_read",
-        "Average bases trimmed per trimmed read"         = "bases_trimmed_avg_per_trimmedread",
-        "Remaining poly-G K-mers R1 3prime"              = "polygkmers3r1_remaining",
-        "Remaining poly-G K-mers R2 3prime"              = "polygkmers3r2_remaining",
-        "Poly-G soft trimmed reads unfiltered R1 3prime" = "polyg_soft_trimmed_reads_unfilt_3r1",
-        "Poly-G soft trimmed reads unfiltered R2 3prime" = "polyg_soft_trimmed_reads_unfilt_3r2",
-        "Poly-G soft trimmed reads filtered R1 3prime"   = "polyg_soft_trimmed_reads_filt_3r1",
-        "Poly-G soft trimmed reads filtered R2 3prime"   = "polyg_soft_trimmed_reads_filt_3r2",
-        "Poly-G soft trimmed bases unfiltered R1 3prime" = "polyg_soft_trimmed_bases_unfilt_3r1",
-        "Poly-G soft trimmed bases unfiltered R2 3prime" = "polyg_soft_trimmed_bases_unfilt_3r2",
-        "Poly-G soft trimmed bases filtered R1 3prime"   = "polyg_soft_trimmed_bases_filt_3r1",
-        "Poly-G soft trimmed bases filtered R2 3prime"   = "polyg_soft_trimmed_bases_filt_3r2",
-        "Total filtered reads"                           = "reads_tot_filt",
-        "Reads filtered for minimum read length R1"      = "reads_filt_minreadlenr1",
-        "Reads filtered for minimum read length R2"      = "reads_filt_minreadlenr2"
-      )
-
-      d |>
-        tibble::as_tibble_col(column_name = "value") |>
-        tidyr::separate_wider_delim("value", names = c("category", "extra", "var", "count", "pct"), delim = ",", too_few = "align_start") |>
-        dplyr::mutate(
-          count = as.numeric(.data$count),
-          pct = round(as.numeric(.data$pct), 2),
-          var = dplyr::recode(.data$var, !!!abbrev_nm)
-        ) |>
-        dplyr::select("var", "count", "pct") |>
-        tidyr::pivot_longer(c("count", "pct")) |>
-        dplyr::filter(!is.na(.data$value)) |>
-        dplyr::mutate(
-          name = dplyr::if_else(.data$name == "count", "", "_pct"),
-          var = glue("{.data$var}{.data$name}")
-        ) |>
-        dplyr::select("var", "value") |>
-        tidyr::pivot_wider(names_from = "var", values_from = "value")
-    },
-    #' @description
-    #' Writes a tidy version of the `trimmer_metrics.csv` file output
-    #' from DRAGEN.
-    #'
-    #' @param d Parsed object from `self$read()`.
-    #' @param prefix Prefix of output file(s).
-    #' @param out_dir Output directory.
-    #' @param out_format Format of output file(s).
-    #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
-    write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) {
-      if (!is.null(out_dir)) {
-        prefix <- file.path(out_dir, prefix)
-      }
-      write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
-    }
-  )
-)
-
-#' SvMetricsFile R6 Class
-#'
-#' @description
-#' Contains methods for reading and displaying contents of
-#' the `sv_metrics.csv` file output from DRAGEN
-#'
-#' @examples
-#' x <- system.file("extdata/wgs/SEQC-II.sv_metrics.csv.gz", package = "dracarys")
-#' sv <- SvMetricsFile$new(x)
-#' d <- sv$read()
-#' sv$write(d, out_dir = tempdir(), prefix = "seqc_sv", out_format = "tsv")
-#'
-#' @export
-SvMetricsFile <- R6::R6Class(
-  "SvMetricsFile",
-  inherit = File,
-  public = list(
-    #' @description
-    #' Reads the `sv_metrics.csv` file output from DRAGEN.
-    #'
-    #' @return tibble with one row and metrics spread across individual columns.
-    read = function() {
-      x <- self$path
-      d <- readr::read_lines(x)
-      assertthat::assert_that(grepl("SV SUMMARY", d[1]))
-      abbrev_nm <- c(
-        "Number of deletions (PASS)" = "n_del",
-        "Number of insertions (PASS)" = "n_ins",
-        "Number of duplications (PASS)" = "n_dup",
-        "Number of breakend pairs (PASS)" = "n_bnd"
-      )
-      d |>
-        tibble::as_tibble_col(column_name = "value") |>
-        dplyr::filter(!grepl("Total number of structural variants", .data$value)) |>
-        tidyr::separate_wider_delim(
-          "value",
-          names = c("svsum", "sample", "var", "count", "pct"), delim = ",",
-          too_few = "align_start"
-        ) |>
-        dplyr::mutate(
-          count = as.numeric(.data$count),
-          pct = round(as.numeric(.data$pct), 2),
-          var = dplyr::recode(.data$var, !!!abbrev_nm)
-        ) |>
-        dplyr::select("var", "count", "pct") |>
-        tidyr::pivot_longer(c("count", "pct")) |>
-        dplyr::mutate(
-          name = dplyr::if_else(.data$name == "count", "", "_pct"),
-          var = glue("{.data$var}{.data$name}")
-        ) |>
-        dplyr::arrange(.data$name, .data$var) |>
-        dplyr::select("var", "value") |>
-        tidyr::pivot_wider(names_from = "var", values_from = "value")
-    },
-    #' @description
-    #' Writes a tidy version of the `sv_metrics.csv` file output
-    #' from DRAGEN.
-    #'
-    #' @param d Parsed object from `self$read()`.
-    #' @param prefix Prefix of output file(s).
-    #' @param out_dir Output directory.
-    #' @param out_format Format of output file(s).
-    #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
-    write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) {
-      if (!is.null(out_dir)) {
-        prefix <- file.path(out_dir, prefix)
-      }
-      write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
-    }
-  )
-)
-
-#' WgsHistFile R6 Class
-#'
-#' @description
-#' Contains methods for reading and displaying contents of
-#' the `wgs_hist.csv` file output from DRAGEN
-#'
-#' @examples
-#' x <- system.file("extdata/wgs/SEQC-II.wgs_hist.csv.gz", package = "dracarys")
-#' h <- WgsHistFile$new(x)
-#' d <- h$read()
-#' h$write(d, out_dir = tempdir(), prefix = "seqc_sv", out_format = "tsv")
-#'
-#' @export
-WgsHistFile <- R6::R6Class(
-  "WgsHistFile",
-  inherit = File,
-  public = list(
-    #' @description
-    #' Reads the `wgs_hist.csv` file output from DRAGEN.
-    #'
-    #' @return tibble with one row and metrics spread across individual columns.
-    read = function() {
-      x <- self$path
-      d <- readr::read_csv(x, col_names = c("var", "pct"), col_types = "cd")
-      d |>
-        dplyr::mutate(
-          var = sub("PCT of bases in wgs with coverage ", "", .data$var),
-          var = gsub("\\[|\\]|\\(|\\)", "", .data$var),
-          var = gsub("x", "", .data$var),
-          var = gsub("inf", "Inf", .data$var)
-        ) |>
-        tidyr::separate_wider_delim("var", names = c("start", "end"), delim = ":") |>
-        dplyr::mutate(
-          start = as.numeric(.data$start),
-          end = as.numeric(.data$end),
-          pct = round(.data$pct, 2),
-          cumsum = cumsum(.data$pct)
-        )
-    },
-    #' @description
-    #' Writes a tidy version of the `wgs_hist.csv` file output
-    #' from DRAGEN.
-    #'
-    #' @param d Parsed object from `self$read()`.
-    #' @param prefix Prefix of output file(s).
-    #' @param out_dir Output directory.
-    #' @param out_format Format of output file(s).
-    #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
-    write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) {
-      if (!is.null(out_dir)) {
-        prefix <- file.path(out_dir, prefix)
-      }
-      write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
-    }
-  )
-)
-
 dragen_subprefix <- function(x, suffix) {
   # L2401290.exon_contig_mean_cov.csv -> exon
   # L2401290.target_bed_contig_mean_cov.csv -> target_bed
diff --git a/R/dragen_fastqc.R b/R/dragen_fastqc.R
index 4390037..0ac31bc 100644
--- a/R/dragen_fastqc.R
+++ b/R/dragen_fastqc.R
@@ -1,51 +1,3 @@
-#' FastqcMetrics R6 Class
-#'
-#' @description
-#' Contains methods for reading and displaying contents of
-#' the `fastqc_metrics.csv` file output from DRAGEN.
-#'
-#' @export
-FastqcMetricsFile <- R6::R6Class(
-  "FastqcMetricsFile",
-  inherit = File,
-  public = list(
-    #' @description
-    #' Reads the `fastqc_metrics.csv` file output from DRAGEN.
-    #'
-    #' @return tibble. TODO.
-    read = function() {
-      x <- self$path
-      res <- dragen_fastqc_metrics_read(x)
-    },
-    #' @description
-    #' Writes a tidy version of the `fastqc_metrics.csv` file output
-    #' from DRAGEN.
-    #'
-    #' @param d Parsed object from `self$read()`.
-    #' @param prefix Prefix of output file(s).
-    #' @param out_dir Output directory.
-    #' @param out_format Format of output file(s).
-    #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
-    write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) {
-      if (!is.null(out_dir)) {
-        prefix <- file.path(out_dir, prefix)
-      }
-      d_write <- d |>
-        tibble::enframe(name = "section") |>
-        dplyr::rowwise() |>
-        dplyr::mutate(
-          section_low = tolower(.data$section),
-          p = glue("{prefix}_{.data$section_low}"),
-          out = list(write_dracarys(obj = .data$value, prefix = .data$p, out_format = out_format, drid = drid))
-        ) |>
-        dplyr::ungroup() |>
-        dplyr::select("section", "value") |>
-        tibble::deframe()
-      invisible(d_write)
-    }
-  )
-)
-
 #' DRAGEN FASTQC Metrics
 #'
 #' Read DRAGEN `fastqc_metrics.csv` file.
diff --git a/R/tso.R b/R/tso.R
index 7596449..6df0cde 100644
--- a/R/tso.R
+++ b/R/tso.R
@@ -325,34 +325,6 @@ tso_tmbt_read <- function(x) {
   d[]
 }
 
-#' Plot Fragment Length Hist
-#'
-#' Plots the fragment length distributions as given in the
-#' `fragment_length_hist` file.
-#'
-#' @param d Parsed tibble.
-#' @param min_count Minimum read count to be plotted (def: 10).
-#'
-#' @return A ggplot2 plot containing fragment lengths on X axis and read counts
-#' on Y axis for each sample.
-tso_fraglenhist_plot <- function(d, min_count = 10) {
-  assertthat::assert_that(is.numeric(min_count), min_count >= 0)
-  d |>
-    dplyr::filter(.data$Count >= min_count) |>
-    ggplot2::ggplot(ggplot2::aes(x = .data$FragmentLength, y = .data$Count)) +
-    ggplot2::geom_line() +
-    ggplot2::labs(title = "Fragment Length Distribution") +
-    ggplot2::xlab("Fragment Length (bp)") +
-    ggplot2::ylab(glue("Read Count (min: {min_count})")) +
-    ggplot2::theme_minimal() +
-    ggplot2::theme(
-      legend.position = c(0.9, 0.9),
-      legend.justification = c(1, 1),
-      panel.grid.minor = ggplot2::element_blank(),
-      panel.grid.major = ggplot2::element_blank(),
-      plot.title = ggplot2::element_text(colour = "#2c3e50", size = 14, face = "bold")
-    )
-}
 
 #' Read TSO TargetRegionCoverage File
 #'

From 9c32347f478220f2ae220c23f03e43f2197a5eb9 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Fri, 18 Oct 2024 21:30:15 +1100
Subject: [PATCH 15/17] dragen: add dirty name checker

---
 R/dragen.R                          | 89 ++++++++++++++++++++++-------
 R/dragen_fastqc.R                   | 18 +++---
 R/tso_dragen.R                      |  2 +-
 R/utils.R                           | 15 +++++
 man/dragen_cnv_metrics_read.Rd      |  6 ++
 man/dragen_coverage_metrics_read.Rd |  5 ++
 man/dragen_gc_metrics_read.Rd       |  6 ++
 man/dragen_mapping_metrics_read.Rd  |  5 ++
 man/dragen_sv_metrics_read.Rd       |  5 ++
 man/dragen_trimmer_metrics_read.Rd  |  5 ++
 man/dragen_umi_metrics_read.Rd      |  5 ++
 man/dragen_vc_metrics_read.Rd       |  5 ++
 12 files changed, 135 insertions(+), 31 deletions(-)

diff --git a/R/dragen.R b/R/dragen.R
index 90cfd8b..2e6cbbd 100644
--- a/R/dragen.R
+++ b/R/dragen.R
@@ -34,6 +34,10 @@ dragen_fraglenhist_plot <- function(d, min_count = 10) {
 #' @param x Path to file.
 #'
 #' @return Tibble with metrics.
+#' @examples
+#' \dontrun{
+#' dragen_umi_metrics_read(x)
+#' }
 #' @export
 dragen_umi_metrics_read <- function(x) {
   d0 <- readr::read_lines(x)
@@ -85,6 +89,7 @@ dragen_umi_metrics_read <- function(x) {
       var = tolower(.data$var),
       var = dplyr::recode(.data$var, !!!abbrev_nm)
     )
+  dirty_names_cleaned(d1$var, abbrev_nm, x)
   hist <- d1 |>
     dplyr::filter(grepl("histo", .data$var)) |>
     dplyr::select(name = "var", "count") |>
@@ -121,6 +126,11 @@ dragen_umi_metrics_read <- function(x) {
 #' @param x Path to file.
 #'
 #' @return Tibble with metrics.
+#' @examples
+#' \dontrun{
+#' dragen_gc_metrics_read(x)
+#' }
+#'
 #' @export
 dragen_gc_metrics_read <- function(x) {
   d0 <- readr::read_lines(x)
@@ -155,6 +165,7 @@ dragen_gc_metrics_read <- function(x) {
       name = dplyr::recode(.data$name, !!!abbrev_nm)
     ) |>
     tidyr::pivot_wider(names_from = "name", values_from = "value")
+  dirty_names_cleaned(colnames(summary), abbrev_nm, x)
 
   # GC BIAS DETAILS
   details <- d1 |>
@@ -190,6 +201,11 @@ dragen_gc_metrics_read <- function(x) {
 #' @param x Path to file.
 #'
 #' @return Tibble with metrics.
+#' @examples
+#' \dontrun{
+#' dragen_cnv_metrics_read(x)
+#' }
+#'
 #' @export
 dragen_cnv_metrics_read <- function(x) {
   d0 <- readr::read_lines(x)
@@ -230,7 +246,9 @@ dragen_cnv_metrics_read <- function(x) {
       count = as.numeric(.data$count),
       pct = round(as.numeric(.data$pct), 2),
       var = dplyr::recode(.data$var, !!!abbrev_nm)
-    ) |>
+    )
+  dirty_names_cleaned(d2$var, abbrev_nm, x)
+  d2 <- d2 |>
     dplyr::select("var", "count", "pct") |>
     tidyr::pivot_longer(c("count", "pct")) |>
     dplyr::filter(!is.na(.data$value)) |>
@@ -251,6 +269,10 @@ dragen_cnv_metrics_read <- function(x) {
 #' @param x Path to file.
 #'
 #' @return Tibble with metrics.
+#' @examples
+#' \dontrun{
+#' dragen_sv_metrics_read(x)
+#' }
 #' @export
 dragen_sv_metrics_read <- function(x) {
   d <- readr::read_lines(x)
@@ -261,7 +283,7 @@ dragen_sv_metrics_read <- function(x) {
     "Number of duplications (PASS)" = "dup",
     "Number of breakend pairs (PASS)" = "bnd"
   )
-  d |>
+  res <- d |>
     tibble::as_tibble_col(column_name = "value") |>
     dplyr::filter(!grepl("Total number of structural variants", .data$value)) |>
     tidyr::separate_wider_delim(
@@ -275,6 +297,8 @@ dragen_sv_metrics_read <- function(x) {
     ) |>
     dplyr::select("var", "count") |>
     tidyr::pivot_wider(names_from = "var", values_from = "count")
+  dirty_names_cleaned(colnames(res), abbrev_nm, x)
+  res
 }
 
 #' Read DRAGEN Trimmer Metrics
@@ -284,10 +308,14 @@ dragen_sv_metrics_read <- function(x) {
 #' @param x Path to file.
 #'
 #' @return Tibble with metrics.
+#' @examples
+#' \dontrun{
+#' dragen_trimmer_metrics_read(x)
+#' }
 #' @export
 dragen_trimmer_metrics_read <- function(x) {
-  d <- readr::read_lines(x)
-  assertthat::assert_that(grepl("TRIMMER STATISTICS", d[1]))
+  d0 <- readr::read_lines(x)
+  assertthat::assert_that(grepl("TRIMMER STATISTICS", d0[1]))
   abbrev_nm <- c(
     "Total input reads"                              = "reads_tot_input",
     "Total input bases"                              = "bases_tot",
@@ -313,7 +341,7 @@ dragen_trimmer_metrics_read <- function(x) {
     "Reads filtered for minimum read length R2"      = "reads_filt_minreadlenr2"
   )
 
-  d |>
+  d1 <- d0 |>
     tibble::as_tibble_col(column_name = "value") |>
     tidyr::separate_wider_delim("value", names = c("category", "extra", "var", "count", "pct"), delim = ",", too_few = "align_start") |>
     dplyr::mutate(
@@ -321,7 +349,9 @@ dragen_trimmer_metrics_read <- function(x) {
       pct = round(as.numeric(.data$pct), 2),
       var = dplyr::recode(.data$var, !!!abbrev_nm)
     ) |>
-    dplyr::select("var", "count", "pct") |>
+    dplyr::select("var", "count", "pct")
+  dirty_names_cleaned(d1$var, abbrev_nm, x)
+  d1 |>
     tidyr::pivot_longer(c("count", "pct")) |>
     dplyr::filter(!is.na(.data$value)) |>
     dplyr::mutate(
@@ -339,6 +369,10 @@ dragen_trimmer_metrics_read <- function(x) {
 #' @param x Path to file.
 #'
 #' @return Tibble with metrics.
+#' @examples
+#' \dontrun{
+#' dragen_vc_metrics_read(x)
+#' }
 #' @export
 dragen_vc_metrics_read <- function(x) {
   abbrev_nm1 <- tibble::tribble(
@@ -367,12 +401,14 @@ dragen_vc_metrics_read <- function(x) {
     "Percent Autosome Callability", "callability_auto_pct", FALSE,
     "Number of samples", "sample_num", FALSE,
     "Reads Processed", "reads_processed", FALSE,
-    "Child Sample", "sample_child", FALSE
+    "Child Sample", "sample_child", FALSE,
+    "Percent QC Region Callability in Region 1", "qc_region_callability_pct_region1", FALSE,
+    "Percent QC Region Callability in Region 2", "qc_region_callability_pct_region2", FALSE
   )
-  raw <- readr::read_lines(x)
-  assertthat::assert_that(grepl("VARIANT CALLER", raw[1]))
+  d0 <- readr::read_lines(x)
+  assertthat::assert_that(grepl("VARIANT CALLER", d0[1]))
   # first detect if this is genome or target region
-  res <- raw |>
+  d1 <- d0 |>
     tibble::as_tibble_col(column_name = "value") |>
     tidyr::separate_wider_delim(
       "value",
@@ -381,7 +417,7 @@ dragen_vc_metrics_read <- function(x) {
     )
   reg1 <- NULL
   str1 <- NULL
-  tmp <- res |>
+  tmp <- d1 |>
     dplyr::filter(grepl("Chr X number of SNPs over ", .data$var)) |>
     dplyr::slice_head(n = 1) |>
     dplyr::pull("var")
@@ -406,7 +442,7 @@ dragen_vc_metrics_read <- function(x) {
     dplyr::select("raw", "clean") |>
     tibble::deframe()
 
-  d <- res |>
+  d <- d1 |>
     dplyr::mutate(
       var = dplyr::recode(.data$var, !!!abbrev_nm),
       count = dplyr::na_if(.data$count, "NA"),
@@ -421,6 +457,7 @@ dragen_vc_metrics_read <- function(x) {
     ) |>
     dplyr::filter(.data$category != "summary") |>
     dplyr::select("category", "sample", "var", "count", "pct")
+  dirty_names_cleaned(d$var, abbrev_nm, x)
   # pivot
   d |>
     tidyr::pivot_longer(c("count", "pct")) |>
@@ -440,6 +477,10 @@ dragen_vc_metrics_read <- function(x) {
 #' @param x Path to file.
 #'
 #' @return Tibble with metrics.
+#' @examples
+#' \dontrun{
+#' dragen_mapping_metrics_read(x)
+#' }
 #' @export
 dragen_mapping_metrics_read <- function(x) {
   abbrev_nm <- c(
@@ -516,11 +557,11 @@ dragen_mapping_metrics_read <- function(x) {
     "Average sequenced coverage over genome" = "cov_avg_seq_over_genome",
     "Filtered rRNA reads" = "reads_rrna_filtered"
   )
-  raw <- readr::read_lines(x)
-  assertthat::assert_that(grepl("MAPPING/ALIGNING", raw[1]))
+  d0 <- readr::read_lines(x)
+  assertthat::assert_that(grepl("MAPPING/ALIGNING", d0[1]))
   # split by RG and non-RG
   # tidy
-  d <- raw |>
+  d <- d0 |>
     tibble::as_tibble_col(column_name = "value") |>
     tidyr::separate_wider_delim(
       "value",
@@ -535,6 +576,7 @@ dragen_mapping_metrics_read <- function(x) {
       RG = dplyr::if_else(.data$RG == "", "Total", .data$RG)
     ) |>
     dplyr::select("RG", "var", "count", "pct")
+  dirty_names_cleaned(unique(d$var), abbrev_nm, x)
   # pivot
   d |>
     tidyr::pivot_longer(c("count", "pct")) |>
@@ -553,6 +595,10 @@ dragen_mapping_metrics_read <- function(x) {
 #' @param x Path to file.
 #'
 #' @return Tibble with metrics.
+#' @examples
+#' \dontrun{
+#' dragen_coverage_metrics_read(x)
+#' }
 #' @export
 dragen_coverage_metrics_read <- function(x) {
   # all rows except 'Aligned bases' and 'Aligned reads' refer to the region
@@ -572,10 +618,10 @@ dragen_coverage_metrics_read <- function(x) {
     "Mean/Median autosomal coverage ratio over ", "cov_mean_median_auto_ratio_over_", TRUE,
     "Aligned reads in ", "reads_aligned_in_", TRUE
   )
-  raw <- readr::read_lines(x)
-  assertthat::assert_that(grepl("COVERAGE SUMMARY", raw[1]))
+  d0 <- readr::read_lines(x)
+  assertthat::assert_that(grepl("COVERAGE SUMMARY", d0[1]))
   # first detect if this is genome, QC coverage region, or target region
-  res <- raw |>
+  d1 <- d0 |>
     tibble::as_tibble_col(column_name = "value") |>
     tidyr::separate_wider_delim(
       "value",
@@ -584,7 +630,7 @@ dragen_coverage_metrics_read <- function(x) {
     )
   reg1 <- NULL
   str1 <- NULL
-  tmp <- res |>
+  tmp <- d1 |>
     dplyr::filter(grepl("PCT of .* with coverage ", .data$var)) |>
     dplyr::slice_head(n = 1) |>
     dplyr::pull("var")
@@ -611,12 +657,13 @@ dragen_coverage_metrics_read <- function(x) {
   # split to rename the
   # "PCT of genome with coverage [100x: inf)" values
   pat <- glue("PCT of {str1} with coverage ")
-  res1 <- res |>
+  res1 <- d1 |>
     # pct just shows % for a couple rows which can be
     # calculated from their above values
     dplyr::filter(!grepl(pat, .data$var)) |>
     dplyr::select("var", "value")
-  res2 <- res |>
+  dirty_names_cleaned(res1$var, names(abbrev_nm), x)
+  res2 <- d1 |>
     dplyr::filter(grepl(pat, .data$var)) |>
     dplyr::mutate(
       var = sub(pat, "", .data$var),
diff --git a/R/dragen_fastqc.R b/R/dragen_fastqc.R
index 0ac31bc..ceb9ce3 100644
--- a/R/dragen_fastqc.R
+++ b/R/dragen_fastqc.R
@@ -120,14 +120,14 @@ dragen_fastqc_metrics_read <- function(x) {
     dplyr::select("mate", "seq", "bp", "value")
 
   list(
-    positional_base_content = pos_base_cont,
-    positional_base_mean_quality = pos_base_mean_qual,
-    positional_quality = pos_qual,
-    read_gc_content = gc_cont,
-    read_gc_content_quality = gc_cont_qual,
-    read_lengths = read_len,
-    read_mean_quality = read_mean_qual,
-    sequence_positions = seq_pos
+    fqc_positionalBaseContent = pos_base_cont,
+    fqc_positionalBaseMeanQuality = pos_base_mean_qual,
+    fqc_positionalQuality = pos_qual,
+    fqc_readGCContent = gc_cont,
+    fqc_readGCContentQuality = gc_cont_qual,
+    fqc_readLengths = read_len,
+    fqc_readMeanQuality = read_mean_qual,
+    fqc_sequencePositions = seq_pos
   ) |>
-    tibble::enframe(name = "fastqc_name", value = "fastqc_value")
+    tibble::enframe(name = "name", value = "data")
 }
diff --git a/R/tso_dragen.R b/R/tso_dragen.R
index d5175a3..dc35091 100644
--- a/R/tso_dragen.R
+++ b/R/tso_dragen.R
@@ -241,7 +241,7 @@ Wf_dragen <- R6::R6Class(
     #' @param x Path to file.
     read_fastqcMetrics = function(x) {
       dat <- dragen_fastqc_metrics_read(x)
-      tibble::tibble(name = "fastqcmetrics", data = list(dat[]))
+      dat
     },
     #' @description Read `gc_metrics.csv` file.
     #' @param x Path to file.
diff --git a/R/utils.R b/R/utils.R
index 8691826..c2bc44c 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -1,3 +1,18 @@
+dirty_names_cleaned <- function(dirty, clean, fname = NULL) {
+  assertthat::assert_that(
+    rlang::is_atomic(dirty),
+    rlang::is_atomic(clean)
+  )
+  are_dirty <- dirty[!dirty %in% clean]
+  msg <- paste(
+    "Following columns are dirty:",
+    paste(are_dirty, collapse = ", "),
+    ifelse(!is.null(fname), paste("Filename:", fname), ""),
+    sep = "\n"
+  )
+  assertthat::assert_that(length(are_dirty) == 0, msg = msg)
+}
+
 #' Print current timestamp for logging
 #'
 #' @return Current timestamp as character.
diff --git a/man/dragen_cnv_metrics_read.Rd b/man/dragen_cnv_metrics_read.Rd
index c6e7281..c347067 100644
--- a/man/dragen_cnv_metrics_read.Rd
+++ b/man/dragen_cnv_metrics_read.Rd
@@ -15,3 +15,9 @@ Tibble with metrics.
 \description{
 Reads the \code{cnv_metrics.csv} file output from DRAGEN.
 }
+\examples{
+\dontrun{
+dragen_cnv_metrics_read(x)
+}
+
+}
diff --git a/man/dragen_coverage_metrics_read.Rd b/man/dragen_coverage_metrics_read.Rd
index 96fd3ef..dc88de2 100644
--- a/man/dragen_coverage_metrics_read.Rd
+++ b/man/dragen_coverage_metrics_read.Rd
@@ -15,3 +15,8 @@ Tibble with metrics.
 \description{
 Reads the \code{coverage_metrics.csv} file generated by DRAGEN.
 }
+\examples{
+\dontrun{
+dragen_coverage_metrics_read(x)
+}
+}
diff --git a/man/dragen_gc_metrics_read.Rd b/man/dragen_gc_metrics_read.Rd
index fc53959..f68f4a0 100644
--- a/man/dragen_gc_metrics_read.Rd
+++ b/man/dragen_gc_metrics_read.Rd
@@ -15,3 +15,9 @@ Tibble with metrics.
 \description{
 Reads the \code{gc_metrics.csv} file output from DRAGEN.
 }
+\examples{
+\dontrun{
+dragen_gc_metrics_read(x)
+}
+
+}
diff --git a/man/dragen_mapping_metrics_read.Rd b/man/dragen_mapping_metrics_read.Rd
index 223f0a7..a2ff466 100644
--- a/man/dragen_mapping_metrics_read.Rd
+++ b/man/dragen_mapping_metrics_read.Rd
@@ -15,3 +15,8 @@ Tibble with metrics.
 \description{
 Reads the \code{mapping_metrics.csv} file output from DRAGEN.
 }
+\examples{
+\dontrun{
+dragen_mapping_metrics_read(x)
+}
+}
diff --git a/man/dragen_sv_metrics_read.Rd b/man/dragen_sv_metrics_read.Rd
index 50577e0..0150f5d 100644
--- a/man/dragen_sv_metrics_read.Rd
+++ b/man/dragen_sv_metrics_read.Rd
@@ -15,3 +15,8 @@ Tibble with metrics.
 \description{
 Reads the \code{sv_metrics.csv} file output from DRAGEN.
 }
+\examples{
+\dontrun{
+dragen_sv_metrics_read(x)
+}
+}
diff --git a/man/dragen_trimmer_metrics_read.Rd b/man/dragen_trimmer_metrics_read.Rd
index 806a86f..8a58398 100644
--- a/man/dragen_trimmer_metrics_read.Rd
+++ b/man/dragen_trimmer_metrics_read.Rd
@@ -15,3 +15,8 @@ Tibble with metrics.
 \description{
 Reads the \code{trimmer_metrics.csv} file output from DRAGEN.
 }
+\examples{
+\dontrun{
+dragen_trimmer_metrics_read(x)
+}
+}
diff --git a/man/dragen_umi_metrics_read.Rd b/man/dragen_umi_metrics_read.Rd
index ca80a82..16f57af 100644
--- a/man/dragen_umi_metrics_read.Rd
+++ b/man/dragen_umi_metrics_read.Rd
@@ -15,3 +15,8 @@ Tibble with metrics.
 \description{
 Reads the \code{umi_metrics.csv} file output from DRAGEN.
 }
+\examples{
+\dontrun{
+dragen_umi_metrics_read(x)
+}
+}
diff --git a/man/dragen_vc_metrics_read.Rd b/man/dragen_vc_metrics_read.Rd
index 9bd4dae..e0e904e 100644
--- a/man/dragen_vc_metrics_read.Rd
+++ b/man/dragen_vc_metrics_read.Rd
@@ -15,3 +15,8 @@ Tibble with metrics.
 \description{
 Reads the \code{vc_metrics.csv} file output from DRAGEN.
 }
+\examples{
+\dontrun{
+dragen_vc_metrics_read(x)
+}
+}

From 94a48578adc3d86c0e6b68a3435dcf83818feca4 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Fri, 18 Oct 2024 23:56:58 +1100
Subject: [PATCH 16/17] Wf writer: output prefix

---
 R/Wf.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/Wf.R b/R/Wf.R
index 4f23fc6..9d35956 100644
--- a/R/Wf.R
+++ b/R/Wf.R
@@ -209,6 +209,7 @@ Wf <- R6::R6Class(
     #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
     write = function(x, outdir = NULL, prefix = NULL, format = "tsv", drid = NULL) {
       assertthat::assert_that(!is.null(prefix))
+      assertthat::assert_that(all(c("name", "data") %in% colnames(x)))
       if (!is.null(outdir)) {
         prefix <- file.path(outdir, prefix)
       }
@@ -219,7 +220,7 @@ Wf <- R6::R6Class(
           out = list(write_dracarys(obj = .data$data, prefix = .data$p, out_format = format, drid = drid))
         ) |>
         dplyr::ungroup() |>
-        dplyr::select("name", "data")
+        dplyr::select("name", "data", prefix = "p")
       invisible(d_write)
     }
   ) # end public

From d3af9e84a6c5a3d6ec944d91b37ddf99dfbc26b8 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Fri, 18 Oct 2024 23:58:04 +1100
Subject: [PATCH 17/17] tsov2: incorporate dragenCaller R6 object

---
 R/tsov2.R                             | 99 +++++++++++++--------------
 man/Wf_tso_ctdna_tumor_only_v2.Rd     | 20 ++++--
 man/dtw_Wf_tso_ctdna_tumor_only_v2.Rd | 13 ++++
 3 files changed, 77 insertions(+), 55 deletions(-)

diff --git a/R/tsov2.R b/R/tsov2.R
index 8ec5175..1a08dd9 100644
--- a/R/tsov2.R
+++ b/R/tsov2.R
@@ -14,11 +14,21 @@
 #' prefix <- "L2401290"
 #' t1 <- Wf_tso_ctdna_tumor_only_v2$new(path = p, prefix = prefix)
 #' t1$list_files(max_files = 100)
+#' t1$dragenObj$list_files(max_files = 100)
 #' t1$list_files_filter_relevant(max_files = 300)
-#' d <- t1$download_files(max_files = 100, dryrun = F)
-#' d_tidy <- t1$tidy_files(d)
-#' d_write <- t1$write(
-#'   d_tidy,
+#' t1$dragenObj$list_files_filter_relevant(max_files = 300)
+#' d1 <- t1$download_files(max_files = 100, dryrun = F)
+#' d2 <- t1$dragenObj$download_files(max_files = 100, dryrun = F)
+#' d1_tidy <- t1$tidy_files(d1)
+#' d2_tidy <- t1$dragenObj$tidy_files(d2)
+#' d_write1 <- t1$write(
+#'   d1_tidy,
+#'   outdir = file.path(p, "dracarys_tidy"),
+#'   prefix = prefix,
+#'   format = "tsv"
+#' )
+#' d_write2 <- t1$dragenObj$write(
+#'   d2_tidy,
 #'   outdir = file.path(p, "dracarys_tidy"),
 #'   prefix = prefix,
 #'   format = "tsv"
@@ -54,6 +64,8 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class(
   public = list(
     #' @field prefix The LibraryID prefix of the tumor sample (needed for path lookup).
     prefix = NULL,
+    #' @field dragenObj dragen object.
+    dragenObj = NULL,
     #' @description Create a new Wf_tso_ctdna_tumor_only_v2 object.
     #' @param path Path to directory with raw workflow results (from S3 or
     #' local filesystem).
@@ -64,6 +76,7 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class(
       res <- glue("Results/{pref}")
       li <- "Logs_Intermediates"
       dc <- glue("{li}/DragenCaller/{pref}")
+      self$dragenObj <- Wf_dragen$new(path = file.path(path, dc), prefix = glue("{dc}/{prefix}"))
       # Results
       reg1 <- tibble::tribble(
         ~regex, ~fun,
@@ -73,7 +86,7 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class(
         glue("{res}/{pref}\\.gene_cov_report\\.tsv$"), "cvgrepg",
         glue("{res}/{pref}\\.hard-filtered\\.vcf\\.gz$"), "hardfilt",
         glue("{res}/{pref}\\.hard-filtered\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY",
-        glue("{res}/{pref}\\.microsat_output\\.json$"), "msi",
+        # glue("{res}/{pref}\\.microsat_output\\.json$"), "msi", # in DragenCaller
         glue("{res}/{pref}\\.tmb.trace\\.tsv$"), "tmbt",
         glue("{res}/{pref}_CombinedVariantOutput\\.tsv$"), "cvo",
         glue("{res}/{pref}_Fusions\\.csv$"), "fus",
@@ -81,50 +94,11 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class(
         # glue("{res}/{pref}_SmallVariants_Annotated\\.json\\.gz$"), "DOWNLOAD_ONLY",
         glue("{li}/SampleAnalysisResults/{pref}_SampleAnalysisResults\\.json$"), "sar"
       )
-      # DragenCaller
-      reg2 <- tibble::tribble(
-        ~regex, ~fun,
-        glue("{dc}/{pref}\\-replay\\.json$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.cnv_metrics.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.exon_contig_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.exon_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.exon_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.exon_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.exon_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.fastqc_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.fragment_length_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.gc_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.gvcf_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.mapping_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.microsat_diffs\\.txt$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.microsat_output\\.json$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.sv_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.target_bed_contig_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.target_bed_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.target_bed_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.target_bed_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.target_bed_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.time_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.tmb_contig_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.tmb_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.tmb_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.tmb_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.tmb_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.trimmer_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.umi_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.vc_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.wgs_contig_mean_cov\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.wgs_coverage_metrics\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.wgs_fine_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.wgs_hist\\.csv$"), "DOWNLOAD_ONLY",
-        glue("{dc}/{pref}\\.wgs_overall_mean_cov\\.csv$"), "DOWNLOAD_ONLY"
-      )
-      regexes <- dplyr::bind_rows(reg1, reg2) |>
+      regexes <- reg1 |>
         dplyr::mutate(
           fun = paste0("read_", .data$fun),
           fun = ifelse(.data$fun == "read_DOWNLOAD_ONLY", "DOWNLOAD_ONLY", .data$fun)
         )
-
       super$initialize(path = path, wname = wname, regexes = regexes)
       self$prefix <- prefix
     },
@@ -225,6 +199,19 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class(
 #'
 #' @examples
 #' \dontrun{
+#' #---- Local ----#
+#' p <- file.path(
+#'   "~/s3/pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production",
+#'   "analysis/cttsov2/20240915ff0295ed"
+#' )
+#' prefix <- "L2401290"
+#' outdir <- file.path(p, "dracarys_tidy")
+#' d <- dtw_Wf_tso_ctdna_tumor_only_v2(
+#'   path = p, prefix = prefix, outdir = outdir,
+#'   format = "tsv",
+#'   dryrun = F
+#' )
+#'
 #' p <- file.path(
 #'   "s3://pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production",
 #'   "analysis/cttsov2/20240915ff0295ed"
@@ -244,18 +231,28 @@ dtw_Wf_tso_ctdna_tumor_only_v2 <- function(path, prefix, outdir,
                                            max_files = 1000,
                                            dryrun = FALSE) {
   obj <- Wf_tso_ctdna_tumor_only_v2$new(path = path, prefix = prefix)
-  d_dl <- obj$download_files(
+  d_dl1 <- obj$download_files(
+    outdir = outdir, max_files = max_files, dryrun = dryrun
+  )
+  d_dl2 <- obj$dragenObj$download_files(
     outdir = outdir, max_files = max_files, dryrun = dryrun
   )
   if (!dryrun) {
-    d_tidy <- obj$tidy_files(d_dl)
-    d_write <- obj$write(
-      d_tidy,
+    d_tidy1 <- obj$tidy_files(d_dl1)
+    d_tidy2 <- obj$dragenObj$tidy_files(d_dl2)
+    d_write1 <- obj$write(
+      d_tidy1,
+      outdir = outdir_tidy,
+      prefix = prefix,
+      format = format
+    )
+    d_write2 <- obj$dragenObj$write(
+      d_tidy2,
       outdir = outdir_tidy,
       prefix = prefix,
       format = format
     )
-    return(d_write)
+    return(dplyr::bind_rows(d_write1, d_write2))
   }
-  return(d_dl)
+  return(dplyr::bind_rows(d_dl1, d_dl2))
 }
diff --git a/man/Wf_tso_ctdna_tumor_only_v2.Rd b/man/Wf_tso_ctdna_tumor_only_v2.Rd
index 55e998d..fb84a29 100644
--- a/man/Wf_tso_ctdna_tumor_only_v2.Rd
+++ b/man/Wf_tso_ctdna_tumor_only_v2.Rd
@@ -17,11 +17,21 @@ p <- file.path(
 prefix <- "L2401290"
 t1 <- Wf_tso_ctdna_tumor_only_v2$new(path = p, prefix = prefix)
 t1$list_files(max_files = 100)
+t1$dragenObj$list_files(max_files = 100)
 t1$list_files_filter_relevant(max_files = 300)
-d <- t1$download_files(max_files = 100, dryrun = F)
-d_tidy <- t1$tidy_files(d)
-d_write <- t1$write(
-  d_tidy,
+t1$dragenObj$list_files_filter_relevant(max_files = 300)
+d1 <- t1$download_files(max_files = 100, dryrun = F)
+d2 <- t1$dragenObj$download_files(max_files = 100, dryrun = F)
+d1_tidy <- t1$tidy_files(d1)
+d2_tidy <- t1$dragenObj$tidy_files(d2)
+d_write1 <- t1$write(
+  d1_tidy,
+  outdir = file.path(p, "dracarys_tidy"),
+  prefix = prefix,
+  format = "tsv"
+)
+d_write2 <- t1$dragenObj$write(
+  d2_tidy,
   outdir = file.path(p, "dracarys_tidy"),
   prefix = prefix,
   format = "tsv"
@@ -58,6 +68,8 @@ d_write <- t2$write(
 \if{html}{\out{<div class="r6-fields">}}
 \describe{
 \item{\code{prefix}}{The LibraryID prefix of the tumor sample (needed for path lookup).}
+
+\item{\code{dragenObj}}{dragen object.}
 }
 \if{html}{\out{</div>}}
 }
diff --git a/man/dtw_Wf_tso_ctdna_tumor_only_v2.Rd b/man/dtw_Wf_tso_ctdna_tumor_only_v2.Rd
index 2a8ebf7..5d9423f 100644
--- a/man/dtw_Wf_tso_ctdna_tumor_only_v2.Rd
+++ b/man/dtw_Wf_tso_ctdna_tumor_only_v2.Rd
@@ -39,6 +39,19 @@ Downloads files from the \code{tso_ctdna_tumor_only_v2} workflow and writes them
 }
 \examples{
 \dontrun{
+#---- Local ----#
+p <- file.path(
+  "~/s3/pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production",
+  "analysis/cttsov2/20240915ff0295ed"
+)
+prefix <- "L2401290"
+outdir <- file.path(p, "dracarys_tidy")
+d <- dtw_Wf_tso_ctdna_tumor_only_v2(
+  path = p, prefix = prefix, outdir = outdir,
+  format = "tsv",
+  dryrun = F
+)
+
 p <- file.path(
   "s3://pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production",
   "analysis/cttsov2/20240915ff0295ed"