From 4eb86661d6293a8b21bafdbe9d481641353ae45c Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Mon, 16 Sep 2024 22:03:26 +1000
Subject: [PATCH 1/8] alignqc: re-eval plots

---
 .../umccr_workflows/alignment_qc/summary.Rmd  | 32 ++++++-------------
 1 file changed, 9 insertions(+), 23 deletions(-)

diff --git a/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd b/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd
index f38aa67..7c44233 100644
--- a/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd
+++ b/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd
@@ -9,7 +9,7 @@ output:
     highlight: kate
 params:
   title: "UMCCR Alignment QC Summary Report"
-  meta: !r here::here("inst/rmd/umccr_workflows/alignment_qc/nogit/meta/2024-09-09_wgts.rds")
+  meta: !r here::here("inst/rmd/umccr_workflows/alignment_qc/nogit/meta/2024-09-14_wgts.rds")
 description: "UMCCR Alignment QC Summary Report"
 title: "`r params$title`"
 ---
@@ -457,11 +457,9 @@ plotly::ggplotly(f1_plot)
 
 ### Positional Base Content ('Per-Position Sequence Content')
 
-Skip
-
 - TODO: create heatmap instead
 
-```{r fqc_pbc, eval=F, fig.height=42}
+```{r fqc_pbc, eval=T, fig.height=42}
 f1 <- dr_unnest("FastqcMetricsFile_positional_base_content")
 f1 |>
   filter(base != "N") |>
@@ -482,9 +480,8 @@ f1 |>
 
 ### Positional Base Mean Quality ('Per-Position Mean Quality Scores')
 
-Skip
 
-```{r fqc_bmq, eval=F, fig.height=80}
+```{r fqc_bmq, eval=T, fig.height=80}
 f1 <- dr_unnest("FastqcMetricsFile_positional_base_mean_quality")
 ggplot() +
   geom_rect(
@@ -508,9 +505,7 @@ ggplot() +
 
 ### Positional Quality ('Per-Position Quality Score Ranges')
 
-Skip
-
-```{r fqc_pq, eval=FALSE, fig.width=13}
+```{r fqc_pq, eval=T, fig.width=13}
 # TODO: use boxplot instead of point
 f1 <- dr_unnest("FastqcMetricsFile_positional_quality")
 quants <- c(25, 50, 75)
@@ -553,9 +548,8 @@ plotly::ggplotly(read_len_plot)
 
 ### Sequence Positions ('Adapter Content')
 
-Skip
 
-```{r seq_pos, eval=F, fig.height=42}
+```{r seq_pos, eval=T, fig.height=42}
 f1 <- dr_unnest("FastqcMetricsFile_sequence_positions")
 f1 |>
   ggplot(aes(x = bp, y = value, colour = seq)) +
@@ -571,9 +565,7 @@ f1 |>
 
 ## Coverage {.tabset .tabset-pills}
 
-Skip
-
-```{r contig_cvg, eval=FALSE, results='asis', fig.height=5}
+```{r contig_cvg, eval=T, results='asis', fig.height=5}
 d1 <- dr_unnest("WgsContigMeanCovFile") |>
   arrange(desc("umccrid"))
 for (type1 in sort(unique(d1$type), decreasing = FALSE)) {
@@ -624,9 +616,7 @@ plotly::ggplotly(flp)
 
 - Only for WGS.
 
-Skip
-
-```{r pe, eval=F, fig.height=5}
+```{r pe, eval=T, fig.height=5}
 chrom_levels <- c(1:22, "x", "y")
 d_pl_plot_data <- d_pl |>
   select(
@@ -651,9 +641,7 @@ plotly::ggplotly(d_pl_plot)
 
 ## Hist
 
-Skip
-
-```{r cvgm, eval=F, fig.height=8, fig.width=12}
+```{r cvgm, eval=T, fig.height=8, fig.width=12}
 d_hist <- dr_unnest("WgsHistFile")
 d_hist1 <- d_hist |>
   ggplot(aes(x = start, y = pct, colour = umccrid)) +
@@ -682,9 +670,7 @@ plotly::subplot(d_hist1, d_hist2, shareY = TRUE, titleY = TRUE, titleX = TRUE, n
 
 ## FineHist
 
-Skip
-
-```{r finehist, eval=FALSE, fig.height=10, fig.width=12}
+```{r finehist, eval=T, fig.height=10, fig.width=12}
 d_fhist <- dr_unnest("WgsFineHistFile")
 d_fhist |>
   dracarys::WgsFineHistFile$public_methods$plot(c(0, 150)) +

From 1b127ceef336345eb0d2af06c23351645345cbea Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Mon, 16 Sep 2024 23:08:50 +1000
Subject: [PATCH 2/8] init tso R6 restructure

---
 R/tso.R | 68 +++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 56 insertions(+), 12 deletions(-)

diff --git a/R/tso.R b/R/tso.R
index 22f1d74..528716a 100644
--- a/R/tso.R
+++ b/R/tso.R
@@ -1,28 +1,72 @@
-#' tso_ctdna_tumor_only Wf R6 Class
+#' Wf_tso_ctdna_tumor_only R6 Class
 #'
 #' @description
-#' Contains methods for reading and processing files output from the UMCCR
-#' `tso_ctdna_tumor_only` workflow.
+#' Reads and writes tidy versions of files from the `tso_ctdna_tumor_only` workflow.
 #'
 #' @examples
 #' \dontrun{
+#'
+#' #---- Local ----#
 #' x <- file.path(
 #'   "~/icav1/g/production/analysis_data/SBJ00596/tso_ctdna_tumor_only",
-#'   "2024050555972acf/L2400482/Results/PTC_ctTSO240429_L2400482/dracarys_gds_sync"
+#'   "2024050555972acf/L2400482/Results"
 #' )
-#' sample_id <- "PTC_ctTSO240429"
-#' library_id <- "L2400482"
-#' d <- TsoCombinedVariantOutputFile$new(x)
-#' d$read()
+#' SampleID <- "PTC_ctTSO240429"
+#' LibraryID <- "L2400482"
 #' }
 #' @export
 Wf_tso_ctdna_tumor_only <- R6::R6Class(
   "Wf_tso_ctdna_tumor_only",
+  inherit = Wf,
   public = list(
-    #' @field sid SampleID.
-    #' @field lid LibraryID.
-    sid = NULL,
-    lid = NULL
+    #' @field SampleID The SampleID of the tumor sample (needed for path lookup).
+    #' @field LibraryID The LibraryID of the tumor sample (needed for path lookup).
+    SampleID = NULL,
+    LibraryID = NULL,
+    #' @description Create a new Wf_tso_ctdna_tumor_only object.
+    #' @param path Path to directory with raw workflow results (from GDS, S3, or
+    #' local filesystem).
+    #' @param SampleID The SampleID of the tumor sample (needed for path lookup).
+    #' @param LibraryID The LibraryID of the sample (needed for path lookup).
+    initialize = function(path = NULL, SampleID = NULL, LibraryID = NULL) {
+      wname <- "tso_ctdna_tumor_only"
+      pref <- glue("{SampleID}_{LibraryID}")
+      regexes <- tibble::tribble(
+        ~regex, ~fun,
+        glue("{pref}/{pref}.AlignCollapseFusionCaller_metrics\\.json\\.gz$"), "TsoAlignCollapseFusionCallerMetricsFile",
+        glue("{pref}/{pref}.TargetRegionCoverage\\.json\\.gz$"), "TsoTargetRegionCoverageFile",
+        glue("{pref}/{pref}.fragment_length_hist\\.json\\.gz$"), "TsoFragmentLengthHistFile",
+        glue("{pref}/{pref}.msi\\.json\\.gz$"), "TsoMsiFile",
+        glue("{pref}/{pref}.tmb\\.json\\.gz$"), "TsoTmbFile",
+        glue("{pref}/{pref}.TMB_Trace\\.tsv$"), "TsoTmbTraceTsvFile",
+        glue("{pref}/{pref}._Fusions\\.csv$"), "TsoFusionsCsvFile",
+        glue("{pref}/{pref}.SampleAnalysisResults\\.json\\.gz$"), "TsoSampleAnalysisResultsFile",
+        glue("{pref}/{pref}.MergedSmallVariants\\.vcf\\.gz$"), "TsoMergedSmallVariantsVcfFile",
+        glue("{pref}/{pref}.MergedSmallVariants\\.vcf\\.gz\\.tbi$"), "TsoMergedSmallVariantsVcfIndexFile",
+        glue("CopyNumberVariants\\.vcf\\.gz$"), "TsoCopyNumberVariantsVcfFile",
+        glue("CopyNumberVariants\\.vcf\\.gz\\.tbi$"), "TsoCopyNumberVariantsVcfIndexFile",
+        glue("CombinedVariantOutput\\.tsv$"), "TsoCombinedVariantOutputFile",
+      ) |>
+        dplyr::mutate(fun = paste0("read_", .data$fun))
+
+      super$initialize(path = path, wname = wname, regexes = regexes)
+      self$SampleID <- SampleID
+      self$LibraryID <- LibraryID
+    },
+    #' @description Print details about the Workflow.
+    #' @param ... (ignored).
+    print = function(...) {
+      res <- tibble::tribble(
+        ~var, ~value,
+        "path", self$path,
+        "wname", self$wname,
+        "filesystem", self$filesystem,
+        "SampleID", self$SampleID,
+        "LibraryID", self$LibraryID
+      )
+      print(res)
+      invisible(self)
+    },
   )
 )
 

From cd408a52ac9328b2014ac839c46f4f4abb15831f Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Wed, 18 Sep 2024 17:39:06 +1000
Subject: [PATCH 3/8] tso: copy over R6 parsers

---
 R/tidy.R                       |   1 +
 R/tso.R                        | 132 ++++++++++++--
 man/Wf_tso_ctdna_tumor_only.Rd | 321 +++++++++++++++++++++++++++++++--
 3 files changed, 422 insertions(+), 32 deletions(-)

diff --git a/R/tidy.R b/R/tidy.R
index 06fac82..924620d 100644
--- a/R/tidy.R
+++ b/R/tidy.R
@@ -19,6 +19,7 @@ tidy_files <- function(x, envir = parent.frame()) {
   assertthat::assert_that(is.data.frame(x))
   assertthat::assert_that(all(c("type", "localpath") %in% colnames(x)))
   x |>
+    dplyr::filter(.data$type != "DOWNLOAD_ONLY") |>
     dplyr::rowwise() |>
     dplyr::mutate(
       data = list(dr_func_eval(f = .data$type, v = .data$type, envir = envir)(.data$localpath))
diff --git a/R/tso.R b/R/tso.R
index 528716a..df12e71 100644
--- a/R/tso.R
+++ b/R/tso.R
@@ -7,12 +7,39 @@
 #' \dontrun{
 #'
 #' #---- Local ----#
-#' x <- file.path(
-#'   "~/icav1/g/production/analysis_data/SBJ00596/tso_ctdna_tumor_only",
-#'   "2024050555972acf/L2400482/Results"
+#' p <- file.path(
+#'   "~/icav1/g/production/analysis_data/SBJ04651/tso_ctdna_tumor_only",
+#'   "20240223d1951163/L2400183/Results"
+#' )
+#' SampleID <- "PRJ230876"
+#' LibraryID <- "L2400183"
+#' prefix <- glue("{SampleID}__{LibraryID}")
+#' t1 <- Wf_tso_ctdna_tumor_only$new(path = p, SampleID = SampleID, LibraryID = LibraryID)
+#' t1$list_files(max_files = 20)
+#' t1$list_files_filter_relevant(max_files = 300)
+#'
+#' #---- GDS ----#
+#' p <- file.path(
+#'   "gds://production/analysis_data/SBJ04651/tso_ctdna_tumor_only",
+#'   "20240223d1951163/L2400183/Results"
+#' )
+#'
+#' outdir <- file.path(sub("gds:/", "~/icav1/g", p))
+#' token <- Sys.getenv("ICA_ACCESS_TOKEN")
+#' t2 <- Wf_tso_ctdna_tumor_only$new(path = p, SampleID = SampleID, LibraryID = LibraryID)
+#' t2$list_files(max_files = 100)
+#' t2$list_files_filter_relevant(max_files = 100)
+#' d <- t2$download_files(
+#'   outdir = outdir, ica_token = token,
+#'   max_files = 100, dryrun = F
+#' )
+#' d_tidy <- t2$tidy_files(d)
+#' d_write <- t2$write(
+#'   d_tidy,
+#'   outdir = file.path(p, "dracarys_tidy"),
+#'   prefix = prefix,
+#'   format = "tsv"
 #' )
-#' SampleID <- "PTC_ctTSO240429"
-#' LibraryID <- "L2400482"
 #' }
 #' @export
 Wf_tso_ctdna_tumor_only <- R6::R6Class(
@@ -33,21 +60,26 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class(
       pref <- glue("{SampleID}_{LibraryID}")
       regexes <- tibble::tribble(
         ~regex, ~fun,
-        glue("{pref}/{pref}.AlignCollapseFusionCaller_metrics\\.json\\.gz$"), "TsoAlignCollapseFusionCallerMetricsFile",
-        glue("{pref}/{pref}.TargetRegionCoverage\\.json\\.gz$"), "TsoTargetRegionCoverageFile",
-        glue("{pref}/{pref}.fragment_length_hist\\.json\\.gz$"), "TsoFragmentLengthHistFile",
-        glue("{pref}/{pref}.msi\\.json\\.gz$"), "TsoMsiFile",
-        glue("{pref}/{pref}.tmb\\.json\\.gz$"), "TsoTmbFile",
-        glue("{pref}/{pref}.TMB_Trace\\.tsv$"), "TsoTmbTraceTsvFile",
-        glue("{pref}/{pref}._Fusions\\.csv$"), "TsoFusionsCsvFile",
-        glue("{pref}/{pref}.SampleAnalysisResults\\.json\\.gz$"), "TsoSampleAnalysisResultsFile",
-        glue("{pref}/{pref}.MergedSmallVariants\\.vcf\\.gz$"), "TsoMergedSmallVariantsVcfFile",
-        glue("{pref}/{pref}.MergedSmallVariants\\.vcf\\.gz\\.tbi$"), "TsoMergedSmallVariantsVcfIndexFile",
-        glue("CopyNumberVariants\\.vcf\\.gz$"), "TsoCopyNumberVariantsVcfFile",
-        glue("CopyNumberVariants\\.vcf\\.gz\\.tbi$"), "TsoCopyNumberVariantsVcfIndexFile",
-        glue("CombinedVariantOutput\\.tsv$"), "TsoCombinedVariantOutputFile",
+        glue("{pref}/{pref}.SampleAnalysisResults\\.json\\.gz$"), "sar",
+        glue("{pref}/{pref}_TMB_Trace\\.tsv$"), "tmbt",
+        glue("{pref}/{pref}.AlignCollapseFusionCaller_metrics\\.json\\.gz$"), "acfc",
+        glue("{pref}/{pref}_MergedSmallVariants\\.vcf\\.gz$"), "msv",
+        glue("{pref}/{pref}_MergedSmallVariants\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY",
+        glue("{pref}/{pref}_MergedSmallVariants\\.genome\\.vcf\\.gz$"), "DOWNLOAD_ONLY",
+        glue("{pref}/{pref}_MergedSmallVariants\\.genome\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY",
+        glue("{pref}/{pref}_CombinedVariantOutput\\.tsv$"), "cvo",
+        glue("{pref}/{pref}_CopyNumberVariants\\.vcf\\.gz$"), "cnv",
+        glue("{pref}/{pref}_CopyNumberVariants\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY",
+        glue("{pref}/{pref}.fragment_length_hist\\.json\\.gz$"), "flh",
+        glue("{pref}/{pref}.TargetRegionCoverage\\.json\\.gz$"), "trc",
+        glue("{pref}/{pref}.tmb\\.json\\.gz$"), "tmb",
+        glue("{pref}/{pref}.msi\\.json\\.gz$"), "msi",
+        glue("{pref}/{pref}_Fusions\\.csv$"), "fus"
       ) |>
-        dplyr::mutate(fun = paste0("read_", .data$fun))
+        dplyr::mutate(
+          fun = paste0("read_", .data$fun),
+          fun = ifelse(.data$fun == "read_DOWNLOAD_ONLY", "DOWNLOAD_ONLY", .data$fun)
+        )
 
       super$initialize(path = path, wname = wname, regexes = regexes)
       self$SampleID <- SampleID
@@ -67,7 +99,67 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class(
       print(res)
       invisible(self)
     },
-  )
+    #' @description Read `SampleAnalysisResults.json.gz` file.
+    #' @param x Path to file.
+    read_sar = function(x) {
+      TsoSampleAnalysisResultsFile$new(x)$read()
+    },
+    #' @description Read `TMB_Trace.tsv` file.
+    #' @param x Path to file.
+    read_tmbt = function(x) {
+      TsoTmbTraceTsvFile$new(x)$read()
+    },
+    #' @description Read `AlignCollapseFusionCaller_metrics.json.gz` file.
+    #' @param x Path to file.
+    read_acfc = function(x) {
+      TsoAlignCollapseFusionCallerMetricsFile$new(x)$read()
+    },
+    #' @description Read `MergedSmallVariants.vcf.gz` file.
+    #' @param x Path to file.
+    read_msv = function(x) {
+      TsoMergedSmallVariantsVcfFile$new(x)$read()
+    },
+    #' @description Read `MergedSmallVariants.genome.vcf.gz` file.
+    #' @param x Path to file.
+    read_msvg = function(x) {
+      TsoMergedSmallVariantsGenomeVcfFile$new(x)$read()
+    },
+    #' @description Read `CombinedVariantOutput.tsv` file.
+    #' @param x Path to file.
+    read_cvo = function(x) {
+      TsoCombinedVariantOutputFile$new(x)$read()
+    },
+    #' @description Read `CopyNumberVariants.vcf.gz` file.
+    #' @param x Path to file.
+    read_cnv = function(x) {
+      TsoCopyNumberVariantsVcfFile$new(x)$read()
+    },
+    #' @description Read `fragment_length_hist.json.gz` file.
+    #' @param x Path to file.
+    read_flh = function(x) {
+      TsoFragmentLengthHistFile$new(x)$read()
+    },
+    #' @description Read `TargetRegionCoverage.json.gz` file.
+    #' @param x Path to file.
+    read_trc = function(x) {
+      TsoTargetRegionCoverageFile$new(x)$read()
+    },
+    #' @description Read `tmb.json.gz` file.
+    #' @param x Path to file.
+    read_tmb = function(x) {
+      TsoTmbFile$new(x)$read()
+    },
+    #' @description Read `msi.json.gz` file.
+    #' @param x Path to file.
+    read_msi = function(x) {
+      TsoMsiFile$new(x)$read()
+    },
+    #' @description Read `Fusions.csv` file.
+    #' @param x Path to file.
+    read_fus = function(x) {
+      TsoFusionsCsvFile$new(x)$read()
+    }
+  ) # end public
 )
 
 #' TsoCombinedVariantOutputFile R6 Class
diff --git a/man/Wf_tso_ctdna_tumor_only.Rd b/man/Wf_tso_ctdna_tumor_only.Rd
index 0af943d..1c3a807 100644
--- a/man/Wf_tso_ctdna_tumor_only.Rd
+++ b/man/Wf_tso_ctdna_tumor_only.Rd
@@ -2,38 +2,335 @@
 % Please edit documentation in R/tso.R
 \name{Wf_tso_ctdna_tumor_only}
 \alias{Wf_tso_ctdna_tumor_only}
-\title{tso_ctdna_tumor_only Wf R6 Class}
+\title{Wf_tso_ctdna_tumor_only R6 Class}
 \description{
-Contains methods for reading and processing files output from the UMCCR
-\code{tso_ctdna_tumor_only} workflow.
+Reads and writes tidy versions of files from the \code{tso_ctdna_tumor_only} workflow.
 }
 \examples{
 \dontrun{
-x <- file.path(
-  "~/icav1/g/production/analysis_data/SBJ00596/tso_ctdna_tumor_only",
-  "2024050555972acf/L2400482/Results/PTC_ctTSO240429_L2400482/dracarys_gds_sync"
+
+#---- Local ----#
+p <- file.path(
+  "~/icav1/g/production/analysis_data/SBJ04651/tso_ctdna_tumor_only",
+  "20240223d1951163/L2400183/Results"
+)
+SampleID <- "PRJ230876"
+LibraryID <- "L2400183"
+prefix <- glue("{SampleID}__{LibraryID}")
+t1 <- Wf_tso_ctdna_tumor_only$new(path = p, SampleID = SampleID, LibraryID = LibraryID)
+t1$list_files(max_files = 20)
+t1$list_files_filter_relevant(max_files = 300)
+
+#---- GDS ----#
+p <- file.path(
+  "gds://production/analysis_data/SBJ04651/tso_ctdna_tumor_only",
+  "20240223d1951163/L2400183/Results"
+)
+
+outdir <- file.path(sub("gds:/", "~/icav1/g", p))
+token <- Sys.getenv("ICA_ACCESS_TOKEN")
+t2 <- Wf_tso_ctdna_tumor_only$new(path = p, SampleID = SampleID, LibraryID = LibraryID)
+t2$list_files(max_files = 100)
+t2$list_files_filter_relevant(max_files = 100)
+d <- t2$download_files(
+  outdir = outdir, ica_token = token,
+  max_files = 100, dryrun = F
+)
+d_tidy <- t2$tidy_files(d)
+d_write <- t2$write(
+  d_tidy,
+  outdir = file.path(p, "dracarys_tidy"),
+  prefix = prefix,
+  format = "tsv"
 )
-sample_id <- "PTC_ctTSO240429"
-library_id <- "L2400482"
-d <- TsoCombinedVariantOutputFile$new(x)
-d$read()
 }
 }
+\section{Super class}{
+\code{\link[dracarys:Wf]{dracarys::Wf}} -> \code{Wf_tso_ctdna_tumor_only}
+}
 \section{Public fields}{
 \if{html}{\out{<div class="r6-fields">}}
 \describe{
-\item{\code{sid}}{SampleID.}
+\item{\code{SampleID}}{The SampleID of the tumor sample (needed for path lookup).}
 
-\item{\code{lid}}{LibraryID.}
+\item{\code{LibraryID}}{The LibraryID of the tumor sample (needed for path lookup).}
 }
 \if{html}{\out{</div>}}
 }
 \section{Methods}{
 \subsection{Public methods}{
 \itemize{
+\item \href{#method-Wf_tso_ctdna_tumor_only-new}{\code{Wf_tso_ctdna_tumor_only$new()}}
+\item \href{#method-Wf_tso_ctdna_tumor_only-print}{\code{Wf_tso_ctdna_tumor_only$print()}}
+\item \href{#method-Wf_tso_ctdna_tumor_only-read_sar}{\code{Wf_tso_ctdna_tumor_only$read_sar()}}
+\item \href{#method-Wf_tso_ctdna_tumor_only-read_tmbt}{\code{Wf_tso_ctdna_tumor_only$read_tmbt()}}
+\item \href{#method-Wf_tso_ctdna_tumor_only-read_acfc}{\code{Wf_tso_ctdna_tumor_only$read_acfc()}}
+\item \href{#method-Wf_tso_ctdna_tumor_only-read_msv}{\code{Wf_tso_ctdna_tumor_only$read_msv()}}
+\item \href{#method-Wf_tso_ctdna_tumor_only-read_msvg}{\code{Wf_tso_ctdna_tumor_only$read_msvg()}}
+\item \href{#method-Wf_tso_ctdna_tumor_only-read_cvo}{\code{Wf_tso_ctdna_tumor_only$read_cvo()}}
+\item \href{#method-Wf_tso_ctdna_tumor_only-read_cnv}{\code{Wf_tso_ctdna_tumor_only$read_cnv()}}
+\item \href{#method-Wf_tso_ctdna_tumor_only-read_flh}{\code{Wf_tso_ctdna_tumor_only$read_flh()}}
+\item \href{#method-Wf_tso_ctdna_tumor_only-read_trc}{\code{Wf_tso_ctdna_tumor_only$read_trc()}}
+\item \href{#method-Wf_tso_ctdna_tumor_only-read_tmb}{\code{Wf_tso_ctdna_tumor_only$read_tmb()}}
+\item \href{#method-Wf_tso_ctdna_tumor_only-read_msi}{\code{Wf_tso_ctdna_tumor_only$read_msi()}}
+\item \href{#method-Wf_tso_ctdna_tumor_only-read_fus}{\code{Wf_tso_ctdna_tumor_only$read_fus()}}
 \item \href{#method-Wf_tso_ctdna_tumor_only-clone}{\code{Wf_tso_ctdna_tumor_only$clone()}}
 }
 }
+\if{html}{\out{
+<details open><summary>Inherited methods</summary>
+<ul>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="download_files"><a href='../../dracarys/html/Wf.html#method-Wf-download_files'><code>dracarys::Wf$download_files()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="list_files"><a href='../../dracarys/html/Wf.html#method-Wf-list_files'><code>dracarys::Wf$list_files()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="list_files_filter_relevant"><a href='../../dracarys/html/Wf.html#method-Wf-list_files_filter_relevant'><code>dracarys::Wf$list_files_filter_relevant()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="tidy_files"><a href='../../dracarys/html/Wf.html#method-Wf-tidy_files'><code>dracarys::Wf$tidy_files()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="dracarys" data-topic="Wf" data-id="write"><a href='../../dracarys/html/Wf.html#method-Wf-write'><code>dracarys::Wf$write()</code></a></span></li>
+</ul>
+</details>
+}}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-new"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-new}{}}}
+\subsection{Method \code{new()}}{
+Create a new Wf_tso_ctdna_tumor_only object.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_tso_ctdna_tumor_only$new(path = NULL, SampleID = NULL, LibraryID = NULL)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{path}}{Path to directory with raw workflow results (from GDS, S3, or
+local filesystem).}
+
+\item{\code{SampleID}}{The SampleID of the tumor sample (needed for path lookup).}
+
+\item{\code{LibraryID}}{The LibraryID of the sample (needed for path lookup).}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-print"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-print}{}}}
+\subsection{Method \code{print()}}{
+Print details about the Workflow.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_tso_ctdna_tumor_only$print(...)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{...}}{(ignored).}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-read_sar"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_sar}{}}}
+\subsection{Method \code{read_sar()}}{
+Read \code{SampleAnalysisResults.json.gz} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_tso_ctdna_tumor_only$read_sar(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-read_tmbt"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_tmbt}{}}}
+\subsection{Method \code{read_tmbt()}}{
+Read \code{TMB_Trace.tsv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_tso_ctdna_tumor_only$read_tmbt(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-read_acfc"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_acfc}{}}}
+\subsection{Method \code{read_acfc()}}{
+Read \code{AlignCollapseFusionCaller_metrics.json.gz} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_tso_ctdna_tumor_only$read_acfc(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-read_msv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_msv}{}}}
+\subsection{Method \code{read_msv()}}{
+Read \code{MergedSmallVariants.vcf.gz} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_tso_ctdna_tumor_only$read_msv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-read_msvg"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_msvg}{}}}
+\subsection{Method \code{read_msvg()}}{
+Read \code{MergedSmallVariants.genome.vcf.gz} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_tso_ctdna_tumor_only$read_msvg(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-read_cvo"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_cvo}{}}}
+\subsection{Method \code{read_cvo()}}{
+Read \code{CombinedVariantOutput.tsv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_tso_ctdna_tumor_only$read_cvo(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-read_cnv"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_cnv}{}}}
+\subsection{Method \code{read_cnv()}}{
+Read \code{CopyNumberVariants.vcf.gz} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_tso_ctdna_tumor_only$read_cnv(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-read_flh"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_flh}{}}}
+\subsection{Method \code{read_flh()}}{
+Read \code{fragment_length_hist.json.gz} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_tso_ctdna_tumor_only$read_flh(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-read_trc"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_trc}{}}}
+\subsection{Method \code{read_trc()}}{
+Read \code{TargetRegionCoverage.json.gz} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_tso_ctdna_tumor_only$read_trc(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-read_tmb"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_tmb}{}}}
+\subsection{Method \code{read_tmb()}}{
+Read \code{tmb.json.gz} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_tso_ctdna_tumor_only$read_tmb(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-read_msi"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_msi}{}}}
+\subsection{Method \code{read_msi()}}{
+Read \code{msi.json.gz} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_tso_ctdna_tumor_only$read_msi(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-read_fus"></a>}}
+\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_fus}{}}}
+\subsection{Method \code{read_fus()}}{
+Read \code{Fusions.csv} file.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Wf_tso_ctdna_tumor_only$read_fus(x)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{x}}{Path to file.}
+}
+\if{html}{\out{</div>}}
+}
+}
 \if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-Wf_tso_ctdna_tumor_only-clone"></a>}}
 \if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-clone}{}}}

From 6cd5a7c9858671e0fccc4047115c04d6632f90a4 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Fri, 20 Sep 2024 15:47:52 +1000
Subject: [PATCH 4/8] tso: output tidy data in list col

---
 R/tso.R | 56 ++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 42 insertions(+), 14 deletions(-)

diff --git a/R/tso.R b/R/tso.R
index df12e71..883e820 100644
--- a/R/tso.R
+++ b/R/tso.R
@@ -17,6 +17,8 @@
 #' t1 <- Wf_tso_ctdna_tumor_only$new(path = p, SampleID = SampleID, LibraryID = LibraryID)
 #' t1$list_files(max_files = 20)
 #' t1$list_files_filter_relevant(max_files = 300)
+#' d <- t1$download_files(max_files = 100, dryrun = F)
+#' d_tidy <- t1$tidy_files(d)
 #'
 #' #---- GDS ----#
 #' p <- file.path(
@@ -65,8 +67,8 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class(
         glue("{pref}/{pref}.AlignCollapseFusionCaller_metrics\\.json\\.gz$"), "acfc",
         glue("{pref}/{pref}_MergedSmallVariants\\.vcf\\.gz$"), "msv",
         glue("{pref}/{pref}_MergedSmallVariants\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY",
-        glue("{pref}/{pref}_MergedSmallVariants\\.genome\\.vcf\\.gz$"), "DOWNLOAD_ONLY",
-        glue("{pref}/{pref}_MergedSmallVariants\\.genome\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY",
+        # glue("{pref}/{pref}_MergedSmallVariants\\.genome\\.vcf\\.gz$"), "DOWNLOAD_ONLY",
+        # glue("{pref}/{pref}_MergedSmallVariants\\.genome\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY",
         glue("{pref}/{pref}_CombinedVariantOutput\\.tsv$"), "cvo",
         glue("{pref}/{pref}_CopyNumberVariants\\.vcf\\.gz$"), "cnv",
         glue("{pref}/{pref}_CopyNumberVariants\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY",
@@ -99,65 +101,91 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class(
       print(res)
       invisible(self)
     },
+    #' @description Tidy given files.
+    #' @param x Tibble with `localpath` to file and the function `type` to parse it.
+    tidy_files = function(x) {
+      assertthat::assert_that(is.data.frame(x))
+      assertthat::assert_that(all(c("type", "localpath") %in% colnames(x)))
+      d1 <- x |>
+        dplyr::filter(.data$type != "DOWNLOAD_ONLY") |>
+        dplyr::rowwise() |>
+        dplyr::mutate(
+          data = list(dr_func_eval(f = .data$type, v = .data$type, envir = self)(.data$localpath))
+        ) |>
+        dplyr::ungroup()
+      d1
+    },
     #' @description Read `SampleAnalysisResults.json.gz` file.
     #' @param x Path to file.
     read_sar = function(x) {
-      TsoSampleAnalysisResultsFile$new(x)$read()
+      TsoSampleAnalysisResultsFile$new(x)$read() |>
+        tibble::enframe(name = "name", value = "data")
     },
     #' @description Read `TMB_Trace.tsv` file.
     #' @param x Path to file.
     read_tmbt = function(x) {
-      TsoTmbTraceTsvFile$new(x)$read()
+      dat <- TsoTmbTraceTsvFile$new(x)$read()
+      tibble::tibble(name = "tmb_trace", data = list(dat))
     },
     #' @description Read `AlignCollapseFusionCaller_metrics.json.gz` file.
     #' @param x Path to file.
     read_acfc = function(x) {
-      TsoAlignCollapseFusionCallerMetricsFile$new(x)$read()
+      TsoAlignCollapseFusionCallerMetricsFile$new(x)$read() |>
+        tibble::enframe(name = "name", value = "data")
     },
     #' @description Read `MergedSmallVariants.vcf.gz` file.
     #' @param x Path to file.
     read_msv = function(x) {
-      TsoMergedSmallVariantsVcfFile$new(x)$read()
+      dat <- TsoMergedSmallVariantsVcfFile$new(x)$read()
+      tibble::tibble(name = "merged_smallv", data = list(dat))
     },
     #' @description Read `MergedSmallVariants.genome.vcf.gz` file.
     #' @param x Path to file.
     read_msvg = function(x) {
-      TsoMergedSmallVariantsGenomeVcfFile$new(x)$read()
+      dat <- TsoMergedSmallVariantsGenomeVcfFile$new(x)$read()
+      tibble::tibble(name = "merged_smallvg", data = list(dat))
     },
     #' @description Read `CombinedVariantOutput.tsv` file.
     #' @param x Path to file.
     read_cvo = function(x) {
-      TsoCombinedVariantOutputFile$new(x)$read()
+      dat <- TsoCombinedVariantOutputFile$new(x)$read()
+      tibble::tibble(name = "combined_var", data = list(dat))
     },
     #' @description Read `CopyNumberVariants.vcf.gz` file.
     #' @param x Path to file.
     read_cnv = function(x) {
-      TsoCopyNumberVariantsVcfFile$new(x)$read()
+      dat <- TsoCopyNumberVariantsVcfFile$new(x)$read()
+      tibble::tibble(name = "cnv", data = list(dat))
     },
     #' @description Read `fragment_length_hist.json.gz` file.
     #' @param x Path to file.
     read_flh = function(x) {
-      TsoFragmentLengthHistFile$new(x)$read()
+      dat <- TsoFragmentLengthHistFile$new(x)$read()
+      tibble::tibble(name = "fraglenhist", data = list(dat))
     },
     #' @description Read `TargetRegionCoverage.json.gz` file.
     #' @param x Path to file.
     read_trc = function(x) {
-      TsoTargetRegionCoverageFile$new(x)$read()
+      dat <- TsoTargetRegionCoverageFile$new(x)$read()
+      tibble::tibble(name = "targetcvg", data = list(dat))
     },
     #' @description Read `tmb.json.gz` file.
     #' @param x Path to file.
     read_tmb = function(x) {
-      TsoTmbFile$new(x)$read()
+      dat <- TsoTmbFile$new(x)$read()
+      tibble::tibble(name = "tmb", data = list(dat))
     },
     #' @description Read `msi.json.gz` file.
     #' @param x Path to file.
     read_msi = function(x) {
-      TsoMsiFile$new(x)$read()
+      dat <- TsoMsiFile$new(x)$read()
+      tibble::tibble(name = "msi", data = list(dat))
     },
     #' @description Read `Fusions.csv` file.
     #' @param x Path to file.
     read_fus = function(x) {
-      TsoFusionsCsvFile$new(x)$read()
+      dat <- TsoFusionsCsvFile$new(x)$read()
+      tibble::tibble(name = "fusions", data = list(dat))
     }
   ) # end public
 )

From 185c7aa3b8ba63cbc7603b641d2db8cc1d4471b3 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sat, 21 Sep 2024 10:06:42 +1000
Subject: [PATCH 5/8] remove spec_tbl_df subclass by []

---
 R/tso.R   | 8 +++++---
 R/utils.R | 3 ++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/R/tso.R b/R/tso.R
index 883e820..6d2b1bd 100644
--- a/R/tso.R
+++ b/R/tso.R
@@ -228,7 +228,7 @@ TsoCombinedVariantOutputFile <- R6::R6Class(
       if (length(smallv) == 0 || ln[(smallv + 2)] == "NA\t\t") {
         return(empty_tbl(names(nm_map)))
       }
-      ln[(smallv + 1):length(ln)] |>
+      d <- ln[(smallv + 1):length(ln)] |>
         I() |> # read parsed data as-is
         readr::read_tsv(
           col_names = TRUE, col_types = readr::cols(
@@ -239,6 +239,7 @@ TsoCombinedVariantOutputFile <- R6::R6Class(
           )
         ) |>
         dplyr::rename(dplyr::any_of(nm_map))
+      d[]
     },
     #' @description
     #' Writes a tidy version of the `CombinedVariantOutput.tsv` (only Small Variants)
@@ -429,7 +430,8 @@ TsoTmbTraceTsvFile <- R6::R6Class(
         GermlineFilterDatabase = "l", GermlineFilterProxi = "l",
         CodingVariant = "l", Nonsynonymous = "l", IncludedInTMBNumerator = "l"
       )
-      readr::read_tsv(x, col_types = ct)
+      d <- readr::read_tsv(x, col_types = ct)
+      d[]
     },
 
     #' @description
@@ -731,7 +733,7 @@ TsoFusionsCsvFile <- R6::R6Class(
       if (nrow(res) == 0) {
         return(empty_tbl(cnames = names(ct)))
       }
-      return(res)
+      return(res[])
     },
 
     #' @description
diff --git a/R/utils.R b/R/utils.R
index 378588b..8691826 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -157,7 +157,8 @@ write_dracarys_list_of_tbls <- function(list_of_tbls, out_dir = NULL, prefix = N
 #' @return A tibble with 0 rows and the given column names.
 #' @export
 empty_tbl <- function(cnames, ctypes = readr::cols(.default = "c")) {
-  readr::read_csv("\n", col_names = cnames, col_types = ctypes)
+  d <- readr::read_csv("\n", col_names = cnames, col_types = ctypes)
+  d[]
 }
 
 read_tsvgz <- function(x, ...) {

From cb0bfbed58658780d063700c1fdc46c0254f6e8f Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sat, 21 Sep 2024 11:45:09 +1000
Subject: [PATCH 6/8] tso: make table names more consistent

---
 R/tso.R      | 18 +++++++++---------
 R/tso_acfc.R | 29 +++++++++++++++++++----------
 R/tso_sar.R  | 18 +++++++++---------
 3 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/R/tso.R b/R/tso.R
index 6d2b1bd..5dc6d14 100644
--- a/R/tso.R
+++ b/R/tso.R
@@ -113,43 +113,43 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class(
           data = list(dr_func_eval(f = .data$type, v = .data$type, envir = self)(.data$localpath))
         ) |>
         dplyr::ungroup()
-      d1
+      d1 |>
+        dplyr::select("data") |>
+        tidyr::unnest("data")
     },
     #' @description Read `SampleAnalysisResults.json.gz` file.
     #' @param x Path to file.
     read_sar = function(x) {
-      TsoSampleAnalysisResultsFile$new(x)$read() |>
-        tibble::enframe(name = "name", value = "data")
+      TsoSampleAnalysisResultsFile$new(x)$read()
     },
     #' @description Read `TMB_Trace.tsv` file.
     #' @param x Path to file.
     read_tmbt = function(x) {
       dat <- TsoTmbTraceTsvFile$new(x)$read()
-      tibble::tibble(name = "tmb_trace", data = list(dat))
+      tibble::tibble(name = "tmbtrace", data = list(dat))
     },
     #' @description Read `AlignCollapseFusionCaller_metrics.json.gz` file.
     #' @param x Path to file.
     read_acfc = function(x) {
-      TsoAlignCollapseFusionCallerMetricsFile$new(x)$read() |>
-        tibble::enframe(name = "name", value = "data")
+      TsoAlignCollapseFusionCallerMetricsFile$new(x)$read()
     },
     #' @description Read `MergedSmallVariants.vcf.gz` file.
     #' @param x Path to file.
     read_msv = function(x) {
       dat <- TsoMergedSmallVariantsVcfFile$new(x)$read()
-      tibble::tibble(name = "merged_smallv", data = list(dat))
+      tibble::tibble(name = "mergedsmallv", data = list(dat))
     },
     #' @description Read `MergedSmallVariants.genome.vcf.gz` file.
     #' @param x Path to file.
     read_msvg = function(x) {
       dat <- TsoMergedSmallVariantsGenomeVcfFile$new(x)$read()
-      tibble::tibble(name = "merged_smallvg", data = list(dat))
+      tibble::tibble(name = "mergedsmallvg", data = list(dat))
     },
     #' @description Read `CombinedVariantOutput.tsv` file.
     #' @param x Path to file.
     read_cvo = function(x) {
       dat <- TsoCombinedVariantOutputFile$new(x)$read()
-      tibble::tibble(name = "combined_var", data = list(dat))
+      tibble::tibble(name = "combinedvaro", data = list(dat))
     },
     #' @description Read `CopyNumberVariants.vcf.gz` file.
     #' @param x Path to file.
diff --git a/R/tso_acfc.R b/R/tso_acfc.R
index 5f726b5..7db5939 100644
--- a/R/tso_acfc.R
+++ b/R/tso_acfc.R
@@ -44,6 +44,12 @@ TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class(
         s1 = c("MappingAligningPerRg", "MappingAligningSummary", "TrimmerStatistics", "CoverageSummary"),
         s2 = c("UmiStatistics", "SvSummary", "RunTime")
       )
+      s1_new <- c(
+        "MappingAligningPerRg" = "acfc_maprg",
+        "MappingAligningSummary" = "acfc_map",
+        "TrimmerStatistics" = "acfc_trim",
+        "CoverageSummary" = "acfc_cvg"
+      )
       secs2 <- unlist(secs, use.names = FALSE)
       secs_in_list <- secs2[secs2 %in% names(j)]
       # just extract the following elements if they exist
@@ -54,9 +60,10 @@ TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class(
       # Pivot all metrics for easier ingestion,
       # and utilise the multiqc parser to rename dirty columns.
       # Keeping each list section separate for flexibility.
-      for (sec in secs$s1) {
+      for (sec in secs[["s1"]]) {
         if (sec %in% names(d)) {
-          d[[sec]] <- d[[sec]] |>
+          new_nm <- s1_new[sec]
+          d[[new_nm]] <- d[[sec]] |>
             tidyr::pivot_longer(cols = c("value", "percent"), names_to = "name1", values_to = "value1") |>
             dplyr::filter(!is.na(.data$value1)) |>
             dplyr::mutate(
@@ -68,11 +75,12 @@ TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class(
             dplyr::mutate(umccr_workflow = "dragen_ctdna") |>
             multiqc_rename_cols() |>
             dplyr::select(-"umccr_workflow")
+          d[[sec]] <- NULL
         }
       }
       if ("UmiStatistics" %in% names(d)) {
         # handle non-hist data
-        d[["UmiStatisticsMain"]] <- d[["UmiStatistics"]] |>
+        d[["acfc_umistats"]] <- d[["UmiStatistics"]] |>
           dplyr::filter(!grepl("Hist", .data$name)) |>
           tidyr::pivot_longer(cols = c("value", "percent"), names_to = "name1", values_to = "value1") |>
           dplyr::filter(!is.na(.data$value1)) |>
@@ -86,7 +94,7 @@ TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class(
           multiqc_rename_cols() |>
           dplyr::select(-"umccr_workflow")
         # handle hist data
-        d[["UmiStatisticsHist"]] <- d[["UmiStatistics"]] |>
+        d[["acfc_umistatshist"]] <- d[["UmiStatistics"]] |>
           dplyr::filter(grepl("Hist", .data$name)) |>
           dplyr::mutate(
             name = sub("Histogram of ", "", .data$name),
@@ -101,26 +109,27 @@ TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class(
         d[["UmiStatistics"]] <- NULL
       }
       if ("SvSummary" %in% names(d)) {
-        d[["SvSummary"]] <- d[["SvSummary"]] |>
+        d[["acfc_svsum"]] <- d[["SvSummary"]] |>
           dplyr::mutate(
             name = sub("Number of (.*) \\(PASS\\)", "\\1", .data$name),
             name = sub("breakend pairs", "bnd_pairs", .data$name),
             value = as.numeric(.data$value)
           ) |>
           tidyr::pivot_wider(names_from = "name", values_from = "value")
+        d[["SvSummary"]] <- NULL
       }
       if ("RunTime" %in% names(d)) {
         # just keep the 'percent' column (number of seconds)
-        d[["RunTime"]] <- d[["RunTime"]] |>
+        d[["acfc_runtime"]] <- d[["RunTime"]] |>
           dplyr::mutate(
             seconds = as.numeric(.data$percent),
             name = tools::toTitleCase(sub("Time ", "", .data$name))
           ) |>
           dplyr::select("name", "seconds") |>
           tidyr::pivot_wider(names_from = "name", values_from = "seconds")
+        d[["RunTime"]] <- NULL
       }
-      # keep as list
-      d
+      tibble::enframe(d, name = "name", value = "data")
     },
 
     #' @description
@@ -167,7 +176,7 @@ TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class(
     #' @param max_num Maximum number to display in both plots.
     #' @return Both histogram plot objects.
     plot = function(d, max_num = 15) {
-      if (is.null(d[["UmiStatisticsHist"]])) {
+      if (is.null(d[["acfc_umistatshist"]])) {
         return(
           list(
             p_num_supporting_fragments = NULL,
@@ -175,7 +184,7 @@ TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class(
           )
         )
       }
-      h <- d[["UmiStatisticsHist"]]
+      h <- d[["acfc_umistatshist"]]
       # 15 seems like a good cutoff for both plots
       p1 <- h |>
         dplyr::filter(
diff --git a/R/tso_sar.R b/R/tso_sar.R
index 511b1f8..dba04c6 100644
--- a/R/tso_sar.R
+++ b/R/tso_sar.R
@@ -123,15 +123,15 @@ TsoSampleAnalysisResultsFile <- R6::R6Class(
           empty_tbl2()
       }
 
-      res <- list(
-        sampleinfo = sampleinfo,
-        qc = qc,
-        swconfds = sw[["data_sources"]],
-        swconfother = sw[["other"]],
-        snv = snvs,
-        cnv = cnvs
-      )
-      res
+      list(
+        sar_sampleinfo = sampleinfo,
+        sar_qc = qc,
+        sar_swconfds = sw[["data_sources"]],
+        sar_swconfother = sw[["other"]],
+        sar_snv = snvs,
+        sar_cnv = cnvs
+      ) |>
+        tibble::enframe(name = "name", value = "data")
     },
     #' @description
     #' Writes a tidy version of the `SampleAnalysisResults.json.gz` file output

From bb854c2124cb699fe8b06b380b0da97773ae8668 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sat, 21 Sep 2024 11:51:52 +1000
Subject: [PATCH 7/8] tidy_files refactor

---
 R/Wf.R                                        |  2 +-
 R/tidy.R                                      | 19 +++++++++++++++----
 R/tso.R                                       | 16 ----------------
 R/tso_acfc.R                                  |  2 +-
 R/tso_sar.R                                   |  2 +-
 ...TsoAlignCollapseFusionCallerMetricsFile.Rd |  2 +-
 man/TsoSampleAnalysisResultsFile.Rd           |  2 +-
 man/Wf.Rd                                     |  2 +-
 man/Wf_tso_ctdna_tumor_only.Rd                |  2 ++
 man/tidy_files.Rd                             | 16 +++++++++++-----
 10 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/R/Wf.R b/R/Wf.R
index 3e00027..6863046 100644
--- a/R/Wf.R
+++ b/R/Wf.R
@@ -200,7 +200,7 @@ Wf <- R6::R6Class(
       tidy_files(x, envir = self)
     },
     #' @description Write tidy data.
-    #' @param x Tibble with tidy `data` and file `type`.
+    #' @param x Tibble with tidy `data` list-column.
     #' @param outdir Directory path to output tidy files.
     #' @param prefix Prefix of output files.
     #' @param format Format of output files.
diff --git a/R/tidy.R b/R/tidy.R
index 924620d..a8c4780 100644
--- a/R/tidy.R
+++ b/R/tidy.R
@@ -1,6 +1,10 @@
 #' Tidy Files
 #'
+#' Tidies files into a tibble with parsed data.
+#'
 #' @param x Tibble with `localpath` to file and the function `type` to parse it.
+#' The function must return a tibble with a `name` column and the tidied `data`
+#' as a list-column (see example).
 #' @param envir the environment in which to evaluate the function e.g. use `self`
 #' when using inside R6 classes.
 #'
@@ -8,9 +12,13 @@
 #' @examples
 #' \dontrun{
 #' p1 <- "~/icav1/g/production/analysis_data/SBJ01155/umccrise/202408300c218043"
-#' p2 <- "L2101566__L2101565/SBJ01155__PRJ211091-qc_summary.tsv.gz"
-#' p <- file.path(p1, p2)
-#' x <- tibble::tibble(type = "readr::read_tsv", localpath = p)
+#' p2 <- "L2101566__L2101565/SBJ01155__PRJ211091/cancer_report_tables"
+#' p <- file.path(p1, p2, "SBJ01155__PRJ211091-qc_summary.tsv.gz")
+#' fun <- function(x) {
+#'   d <- readr::read_tsv(x)
+#'   tibble::tibble(name = "table1", data = list(d[]))
+#' }
+#' x <- tibble::tibble(type = "fun", localpath = p)
 #' tidy_files(x)
 #' }
 #'
@@ -23,7 +31,10 @@ tidy_files <- function(x, envir = parent.frame()) {
     dplyr::rowwise() |>
     dplyr::mutate(
       data = list(dr_func_eval(f = .data$type, v = .data$type, envir = envir)(.data$localpath))
-    )
+    ) |>
+    dplyr::ungroup() |>
+    dplyr::select("data") |>
+    tidyr::unnest("data")
 }
 
 #' Tidy UMCCR Results
diff --git a/R/tso.R b/R/tso.R
index 5dc6d14..65fb371 100644
--- a/R/tso.R
+++ b/R/tso.R
@@ -101,22 +101,6 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class(
       print(res)
       invisible(self)
     },
-    #' @description Tidy given files.
-    #' @param x Tibble with `localpath` to file and the function `type` to parse it.
-    tidy_files = function(x) {
-      assertthat::assert_that(is.data.frame(x))
-      assertthat::assert_that(all(c("type", "localpath") %in% colnames(x)))
-      d1 <- x |>
-        dplyr::filter(.data$type != "DOWNLOAD_ONLY") |>
-        dplyr::rowwise() |>
-        dplyr::mutate(
-          data = list(dr_func_eval(f = .data$type, v = .data$type, envir = self)(.data$localpath))
-        ) |>
-        dplyr::ungroup()
-      d1 |>
-        dplyr::select("data") |>
-        tidyr::unnest("data")
-    },
     #' @description Read `SampleAnalysisResults.json.gz` file.
     #' @param x Path to file.
     read_sar = function(x) {
diff --git a/R/tso_acfc.R b/R/tso_acfc.R
index 7db5939..ec56966 100644
--- a/R/tso_acfc.R
+++ b/R/tso_acfc.R
@@ -10,7 +10,7 @@
 #' )
 #' m <- TsoAlignCollapseFusionCallerMetricsFile$new(x)
 #' d_parsed <- m$read() # or read(m)
-#' m$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = c("tsv", "rds"))
+#' # m$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = c("tsv", "rds"))
 #' @export
 TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class(
   "TsoAlignCollapseFusionCallerMetricsFile",
diff --git a/R/tso_sar.R b/R/tso_sar.R
index dba04c6..d691ffa 100644
--- a/R/tso_sar.R
+++ b/R/tso_sar.R
@@ -8,7 +8,7 @@
 #' x <- system.file("extdata/tso/sample705_SampleAnalysisResults.json.gz", package = "dracarys")
 #' res <- TsoSampleAnalysisResultsFile$new(x)
 #' d_parsed <- res$read() # or read(res)
-#' res$write(d_parsed, tempfile(), "tsv")
+#' # res$write(d_parsed, tempfile(), "tsv")
 #' @export
 TsoSampleAnalysisResultsFile <- R6::R6Class(
   "TsoSampleAnalysisResultsFile",
diff --git a/man/TsoAlignCollapseFusionCallerMetricsFile.Rd b/man/TsoAlignCollapseFusionCallerMetricsFile.Rd
index 20ea0d9..b450a8e 100644
--- a/man/TsoAlignCollapseFusionCallerMetricsFile.Rd
+++ b/man/TsoAlignCollapseFusionCallerMetricsFile.Rd
@@ -13,7 +13,7 @@ x <- system.file("extdata/tso/sample705.AlignCollapseFusionCaller_metrics.json.g
 )
 m <- TsoAlignCollapseFusionCallerMetricsFile$new(x)
 d_parsed <- m$read() # or read(m)
-m$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = c("tsv", "rds"))
+#m$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = c("tsv", "rds"))
 }
 \section{Super class}{
 \code{\link[dracarys:File]{dracarys::File}} -> \code{TsoAlignCollapseFusionCallerMetricsFile}
diff --git a/man/TsoSampleAnalysisResultsFile.Rd b/man/TsoSampleAnalysisResultsFile.Rd
index b051d8a..5886b18 100644
--- a/man/TsoSampleAnalysisResultsFile.Rd
+++ b/man/TsoSampleAnalysisResultsFile.Rd
@@ -11,7 +11,7 @@ Contains methods for reading and displaying contents of the
 x <- system.file("extdata/tso/sample705_SampleAnalysisResults.json.gz", package = "dracarys")
 res <- TsoSampleAnalysisResultsFile$new(x)
 d_parsed <- res$read() # or read(res)
-res$write(d_parsed, tempfile(), "tsv")
+#res$write(d_parsed, tempfile(), "tsv")
 }
 \section{Super class}{
 \code{\link[dracarys:File]{dracarys::File}} -> \code{TsoSampleAnalysisResultsFile}
diff --git a/man/Wf.Rd b/man/Wf.Rd
index 3005725..b2c99f3 100644
--- a/man/Wf.Rd
+++ b/man/Wf.Rd
@@ -255,7 +255,7 @@ Write tidy data.
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
-\item{\code{x}}{Tibble with tidy \code{data} and file \code{type}.}
+\item{\code{x}}{Tibble with tidy \code{data} list-column.}
 
 \item{\code{outdir}}{Directory path to output tidy files.}
 
diff --git a/man/Wf_tso_ctdna_tumor_only.Rd b/man/Wf_tso_ctdna_tumor_only.Rd
index 1c3a807..799b90e 100644
--- a/man/Wf_tso_ctdna_tumor_only.Rd
+++ b/man/Wf_tso_ctdna_tumor_only.Rd
@@ -20,6 +20,8 @@ prefix <- glue("{SampleID}__{LibraryID}")
 t1 <- Wf_tso_ctdna_tumor_only$new(path = p, SampleID = SampleID, LibraryID = LibraryID)
 t1$list_files(max_files = 20)
 t1$list_files_filter_relevant(max_files = 300)
+d <- t1$download_files(max_files = 100, dryrun = F)
+d_tidy <- t1$tidy_files(d)
 
 #---- GDS ----#
 p <- file.path(
diff --git a/man/tidy_files.Rd b/man/tidy_files.Rd
index 8d1849b..caff8d2 100644
--- a/man/tidy_files.Rd
+++ b/man/tidy_files.Rd
@@ -7,7 +7,9 @@
 tidy_files(x, envir = parent.frame())
 }
 \arguments{
-\item{x}{Tibble with \code{localpath} to file and the function \code{type} to parse it.}
+\item{x}{Tibble with \code{localpath} to file and the function \code{type} to parse it.
+The function must return a tibble with a \code{name} column and the tidied \code{data}
+as a list-column (see example).}
 
 \item{envir}{the environment in which to evaluate the function e.g. use \code{self}
 when using inside R6 classes.}
@@ -16,14 +18,18 @@ when using inside R6 classes.}
 Tibble with parsed data in a \code{data} list-column.
 }
 \description{
-Tidy Files
+Tidies files into a tibble with parsed data.
 }
 \examples{
 \dontrun{
 p1 <- "~/icav1/g/production/analysis_data/SBJ01155/umccrise/202408300c218043"
-p2 <- "L2101566__L2101565/SBJ01155__PRJ211091-qc_summary.tsv.gz"
-p <- file.path(p1, p2)
-x <- tibble::tibble(type = "readr::read_tsv", localpath = p)
+p2 <- "L2101566__L2101565/SBJ01155__PRJ211091/cancer_report_tables"
+p <- file.path(p1, p2, "SBJ01155__PRJ211091-qc_summary.tsv.gz")
+fun <- function(x) {
+  d <- readr::read_tsv(x)
+  tibble::tibble(name = "table1", data = list(d[]))
+}
+x <- tibble::tibble(type = "fun", localpath = p)
 tidy_files(x)
 }
 

From 108c07f64a4f0f8d9f12cbfdf13bf18be423a868 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sat, 21 Sep 2024 12:14:06 +1000
Subject: [PATCH 8/8] Wf: fix writer

---
 R/Wf.R  |  5 ++---
 R/tso.R | 16 ++++++++++++----
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/R/Wf.R b/R/Wf.R
index 6863046..128aabf 100644
--- a/R/Wf.R
+++ b/R/Wf.R
@@ -213,12 +213,11 @@ Wf <- R6::R6Class(
       d_write <- x |>
         dplyr::rowwise() |>
         dplyr::mutate(
-          section = sub("read_", "", .data$type),
-          p = glue("{prefix}_{.data$section}"),
+          p = glue("{prefix}_{.data$name}"),
           out = list(write_dracarys(obj = .data$data, prefix = .data$p, out_format = format, drid = drid))
         ) |>
         dplyr::ungroup() |>
-        dplyr::select("section", "data") |>
+        dplyr::select("name", "data") |>
         tibble::deframe()
       invisible(d_write)
     }
diff --git a/R/tso.R b/R/tso.R
index 65fb371..68dcb1e 100644
--- a/R/tso.R
+++ b/R/tso.R
@@ -19,13 +19,21 @@
 #' t1$list_files_filter_relevant(max_files = 300)
 #' d <- t1$download_files(max_files = 100, dryrun = F)
 #' d_tidy <- t1$tidy_files(d)
+#' d_write <- t1$write(
+#'   d_tidy,
+#'   outdir = file.path(p, "dracarys_tidy"),
+#'   prefix = prefix,
+#'   format = "tsv"
+#' )
 #'
 #' #---- GDS ----#
 #' p <- file.path(
-#'   "gds://production/analysis_data/SBJ04651/tso_ctdna_tumor_only",
-#'   "20240223d1951163/L2400183/Results"
+#'   "gds://production/analysis_data/SBJ05563/tso_ctdna_tumor_only",
+#'   "20240914d41300cd/L2401388/Results"
 #' )
-#'
+#' SampleID <- "PRJ241446"
+#' LibraryID <- "L2401388"
+#' prefix <- glue("{SampleID}__{LibraryID}")
 #' outdir <- file.path(sub("gds:/", "~/icav1/g", p))
 #' token <- Sys.getenv("ICA_ACCESS_TOKEN")
 #' t2 <- Wf_tso_ctdna_tumor_only$new(path = p, SampleID = SampleID, LibraryID = LibraryID)
@@ -38,7 +46,7 @@
 #' d_tidy <- t2$tidy_files(d)
 #' d_write <- t2$write(
 #'   d_tidy,
-#'   outdir = file.path(p, "dracarys_tidy"),
+#'   outdir = file.path(outdir, "dracarys_tidy"),
 #'   prefix = prefix,
 #'   format = "tsv"
 #' )