From 4eb86661d6293a8b21bafdbe9d481641353ae45c Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Mon, 16 Sep 2024 22:03:26 +1000 Subject: [PATCH 1/8] alignqc: re-eval plots --- .../umccr_workflows/alignment_qc/summary.Rmd | 32 ++++++------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd b/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd index f38aa67..7c44233 100644 --- a/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd +++ b/inst/rmd/umccr_workflows/alignment_qc/summary.Rmd @@ -9,7 +9,7 @@ output: highlight: kate params: title: "UMCCR Alignment QC Summary Report" - meta: !r here::here("inst/rmd/umccr_workflows/alignment_qc/nogit/meta/2024-09-09_wgts.rds") + meta: !r here::here("inst/rmd/umccr_workflows/alignment_qc/nogit/meta/2024-09-14_wgts.rds") description: "UMCCR Alignment QC Summary Report" title: "`r params$title`" --- @@ -457,11 +457,9 @@ plotly::ggplotly(f1_plot) ### Positional Base Content ('Per-Position Sequence Content') -Skip - - TODO: create heatmap instead -```{r fqc_pbc, eval=F, fig.height=42} +```{r fqc_pbc, eval=T, fig.height=42} f1 <- dr_unnest("FastqcMetricsFile_positional_base_content") f1 |> filter(base != "N") |> @@ -482,9 +480,8 @@ f1 |> ### Positional Base Mean Quality ('Per-Position Mean Quality Scores') -Skip -```{r fqc_bmq, eval=F, fig.height=80} +```{r fqc_bmq, eval=T, fig.height=80} f1 <- dr_unnest("FastqcMetricsFile_positional_base_mean_quality") ggplot() + geom_rect( @@ -508,9 +505,7 @@ ggplot() + ### Positional Quality ('Per-Position Quality Score Ranges') -Skip - -```{r fqc_pq, eval=FALSE, fig.width=13} +```{r fqc_pq, eval=T, fig.width=13} # TODO: use boxplot instead of point f1 <- dr_unnest("FastqcMetricsFile_positional_quality") quants <- c(25, 50, 75) @@ -553,9 +548,8 @@ plotly::ggplotly(read_len_plot) ### Sequence Positions ('Adapter Content') -Skip -```{r seq_pos, eval=F, fig.height=42} +```{r seq_pos, eval=T, fig.height=42} f1 <- dr_unnest("FastqcMetricsFile_sequence_positions") f1 |> ggplot(aes(x = bp, y = value, colour = seq)) + @@ -571,9 +565,7 @@ f1 |> ## Coverage {.tabset .tabset-pills} -Skip - -```{r contig_cvg, eval=FALSE, results='asis', fig.height=5} +```{r contig_cvg, eval=T, results='asis', fig.height=5} d1 <- dr_unnest("WgsContigMeanCovFile") |> arrange(desc("umccrid")) for (type1 in sort(unique(d1$type), decreasing = FALSE)) { @@ -624,9 +616,7 @@ plotly::ggplotly(flp) - Only for WGS. -Skip - -```{r pe, eval=F, fig.height=5} +```{r pe, eval=T, fig.height=5} chrom_levels <- c(1:22, "x", "y") d_pl_plot_data <- d_pl |> select( @@ -651,9 +641,7 @@ plotly::ggplotly(d_pl_plot) ## Hist -Skip - -```{r cvgm, eval=F, fig.height=8, fig.width=12} +```{r cvgm, eval=T, fig.height=8, fig.width=12} d_hist <- dr_unnest("WgsHistFile") d_hist1 <- d_hist |> ggplot(aes(x = start, y = pct, colour = umccrid)) + @@ -682,9 +670,7 @@ plotly::subplot(d_hist1, d_hist2, shareY = TRUE, titleY = TRUE, titleX = TRUE, n ## FineHist -Skip - -```{r finehist, eval=FALSE, fig.height=10, fig.width=12} +```{r finehist, eval=T, fig.height=10, fig.width=12} d_fhist <- dr_unnest("WgsFineHistFile") d_fhist |> dracarys::WgsFineHistFile$public_methods$plot(c(0, 150)) + From 1b127ceef336345eb0d2af06c23351645345cbea Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Mon, 16 Sep 2024 23:08:50 +1000 Subject: [PATCH 2/8] init tso R6 restructure --- R/tso.R | 68 +++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/R/tso.R b/R/tso.R index 22f1d74..528716a 100644 --- a/R/tso.R +++ b/R/tso.R @@ -1,28 +1,72 @@ -#' tso_ctdna_tumor_only Wf R6 Class +#' Wf_tso_ctdna_tumor_only R6 Class #' #' @description -#' Contains methods for reading and processing files output from the UMCCR -#' `tso_ctdna_tumor_only` workflow. +#' Reads and writes tidy versions of files from the `tso_ctdna_tumor_only` workflow. #' #' @examples #' \dontrun{ +#' +#' #---- Local ----# #' x <- file.path( #' "~/icav1/g/production/analysis_data/SBJ00596/tso_ctdna_tumor_only", -#' "2024050555972acf/L2400482/Results/PTC_ctTSO240429_L2400482/dracarys_gds_sync" +#' "2024050555972acf/L2400482/Results" #' ) -#' sample_id <- "PTC_ctTSO240429" -#' library_id <- "L2400482" -#' d <- TsoCombinedVariantOutputFile$new(x) -#' d$read() +#' SampleID <- "PTC_ctTSO240429" +#' LibraryID <- "L2400482" #' } #' @export Wf_tso_ctdna_tumor_only <- R6::R6Class( "Wf_tso_ctdna_tumor_only", + inherit = Wf, public = list( - #' @field sid SampleID. - #' @field lid LibraryID. - sid = NULL, - lid = NULL + #' @field SampleID The SampleID of the tumor sample (needed for path lookup). + #' @field LibraryID The LibraryID of the tumor sample (needed for path lookup). + SampleID = NULL, + LibraryID = NULL, + #' @description Create a new Wf_tso_ctdna_tumor_only object. + #' @param path Path to directory with raw workflow results (from GDS, S3, or + #' local filesystem). + #' @param SampleID The SampleID of the tumor sample (needed for path lookup). + #' @param LibraryID The LibraryID of the sample (needed for path lookup). + initialize = function(path = NULL, SampleID = NULL, LibraryID = NULL) { + wname <- "tso_ctdna_tumor_only" + pref <- glue("{SampleID}_{LibraryID}") + regexes <- tibble::tribble( + ~regex, ~fun, + glue("{pref}/{pref}.AlignCollapseFusionCaller_metrics\\.json\\.gz$"), "TsoAlignCollapseFusionCallerMetricsFile", + glue("{pref}/{pref}.TargetRegionCoverage\\.json\\.gz$"), "TsoTargetRegionCoverageFile", + glue("{pref}/{pref}.fragment_length_hist\\.json\\.gz$"), "TsoFragmentLengthHistFile", + glue("{pref}/{pref}.msi\\.json\\.gz$"), "TsoMsiFile", + glue("{pref}/{pref}.tmb\\.json\\.gz$"), "TsoTmbFile", + glue("{pref}/{pref}.TMB_Trace\\.tsv$"), "TsoTmbTraceTsvFile", + glue("{pref}/{pref}._Fusions\\.csv$"), "TsoFusionsCsvFile", + glue("{pref}/{pref}.SampleAnalysisResults\\.json\\.gz$"), "TsoSampleAnalysisResultsFile", + glue("{pref}/{pref}.MergedSmallVariants\\.vcf\\.gz$"), "TsoMergedSmallVariantsVcfFile", + glue("{pref}/{pref}.MergedSmallVariants\\.vcf\\.gz\\.tbi$"), "TsoMergedSmallVariantsVcfIndexFile", + glue("CopyNumberVariants\\.vcf\\.gz$"), "TsoCopyNumberVariantsVcfFile", + glue("CopyNumberVariants\\.vcf\\.gz\\.tbi$"), "TsoCopyNumberVariantsVcfIndexFile", + glue("CombinedVariantOutput\\.tsv$"), "TsoCombinedVariantOutputFile", + ) |> + dplyr::mutate(fun = paste0("read_", .data$fun)) + + super$initialize(path = path, wname = wname, regexes = regexes) + self$SampleID <- SampleID + self$LibraryID <- LibraryID + }, + #' @description Print details about the Workflow. + #' @param ... (ignored). + print = function(...) { + res <- tibble::tribble( + ~var, ~value, + "path", self$path, + "wname", self$wname, + "filesystem", self$filesystem, + "SampleID", self$SampleID, + "LibraryID", self$LibraryID + ) + print(res) + invisible(self) + }, ) ) From cd408a52ac9328b2014ac839c46f4f4abb15831f Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Wed, 18 Sep 2024 17:39:06 +1000 Subject: [PATCH 3/8] tso: copy over R6 parsers --- R/tidy.R | 1 + R/tso.R | 132 ++++++++++++-- man/Wf_tso_ctdna_tumor_only.Rd | 321 +++++++++++++++++++++++++++++++-- 3 files changed, 422 insertions(+), 32 deletions(-) diff --git a/R/tidy.R b/R/tidy.R index 06fac82..924620d 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -19,6 +19,7 @@ tidy_files <- function(x, envir = parent.frame()) { assertthat::assert_that(is.data.frame(x)) assertthat::assert_that(all(c("type", "localpath") %in% colnames(x))) x |> + dplyr::filter(.data$type != "DOWNLOAD_ONLY") |> dplyr::rowwise() |> dplyr::mutate( data = list(dr_func_eval(f = .data$type, v = .data$type, envir = envir)(.data$localpath)) diff --git a/R/tso.R b/R/tso.R index 528716a..df12e71 100644 --- a/R/tso.R +++ b/R/tso.R @@ -7,12 +7,39 @@ #' \dontrun{ #' #' #---- Local ----# -#' x <- file.path( -#' "~/icav1/g/production/analysis_data/SBJ00596/tso_ctdna_tumor_only", -#' "2024050555972acf/L2400482/Results" +#' p <- file.path( +#' "~/icav1/g/production/analysis_data/SBJ04651/tso_ctdna_tumor_only", +#' "20240223d1951163/L2400183/Results" +#' ) +#' SampleID <- "PRJ230876" +#' LibraryID <- "L2400183" +#' prefix <- glue("{SampleID}__{LibraryID}") +#' t1 <- Wf_tso_ctdna_tumor_only$new(path = p, SampleID = SampleID, LibraryID = LibraryID) +#' t1$list_files(max_files = 20) +#' t1$list_files_filter_relevant(max_files = 300) +#' +#' #---- GDS ----# +#' p <- file.path( +#' "gds://production/analysis_data/SBJ04651/tso_ctdna_tumor_only", +#' "20240223d1951163/L2400183/Results" +#' ) +#' +#' outdir <- file.path(sub("gds:/", "~/icav1/g", p)) +#' token <- Sys.getenv("ICA_ACCESS_TOKEN") +#' t2 <- Wf_tso_ctdna_tumor_only$new(path = p, SampleID = SampleID, LibraryID = LibraryID) +#' t2$list_files(max_files = 100) +#' t2$list_files_filter_relevant(max_files = 100) +#' d <- t2$download_files( +#' outdir = outdir, ica_token = token, +#' max_files = 100, dryrun = F +#' ) +#' d_tidy <- t2$tidy_files(d) +#' d_write <- t2$write( +#' d_tidy, +#' outdir = file.path(p, "dracarys_tidy"), +#' prefix = prefix, +#' format = "tsv" #' ) -#' SampleID <- "PTC_ctTSO240429" -#' LibraryID <- "L2400482" #' } #' @export Wf_tso_ctdna_tumor_only <- R6::R6Class( @@ -33,21 +60,26 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class( pref <- glue("{SampleID}_{LibraryID}") regexes <- tibble::tribble( ~regex, ~fun, - glue("{pref}/{pref}.AlignCollapseFusionCaller_metrics\\.json\\.gz$"), "TsoAlignCollapseFusionCallerMetricsFile", - glue("{pref}/{pref}.TargetRegionCoverage\\.json\\.gz$"), "TsoTargetRegionCoverageFile", - glue("{pref}/{pref}.fragment_length_hist\\.json\\.gz$"), "TsoFragmentLengthHistFile", - glue("{pref}/{pref}.msi\\.json\\.gz$"), "TsoMsiFile", - glue("{pref}/{pref}.tmb\\.json\\.gz$"), "TsoTmbFile", - glue("{pref}/{pref}.TMB_Trace\\.tsv$"), "TsoTmbTraceTsvFile", - glue("{pref}/{pref}._Fusions\\.csv$"), "TsoFusionsCsvFile", - glue("{pref}/{pref}.SampleAnalysisResults\\.json\\.gz$"), "TsoSampleAnalysisResultsFile", - glue("{pref}/{pref}.MergedSmallVariants\\.vcf\\.gz$"), "TsoMergedSmallVariantsVcfFile", - glue("{pref}/{pref}.MergedSmallVariants\\.vcf\\.gz\\.tbi$"), "TsoMergedSmallVariantsVcfIndexFile", - glue("CopyNumberVariants\\.vcf\\.gz$"), "TsoCopyNumberVariantsVcfFile", - glue("CopyNumberVariants\\.vcf\\.gz\\.tbi$"), "TsoCopyNumberVariantsVcfIndexFile", - glue("CombinedVariantOutput\\.tsv$"), "TsoCombinedVariantOutputFile", + glue("{pref}/{pref}.SampleAnalysisResults\\.json\\.gz$"), "sar", + glue("{pref}/{pref}_TMB_Trace\\.tsv$"), "tmbt", + glue("{pref}/{pref}.AlignCollapseFusionCaller_metrics\\.json\\.gz$"), "acfc", + glue("{pref}/{pref}_MergedSmallVariants\\.vcf\\.gz$"), "msv", + glue("{pref}/{pref}_MergedSmallVariants\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY", + glue("{pref}/{pref}_MergedSmallVariants\\.genome\\.vcf\\.gz$"), "DOWNLOAD_ONLY", + glue("{pref}/{pref}_MergedSmallVariants\\.genome\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY", + glue("{pref}/{pref}_CombinedVariantOutput\\.tsv$"), "cvo", + glue("{pref}/{pref}_CopyNumberVariants\\.vcf\\.gz$"), "cnv", + glue("{pref}/{pref}_CopyNumberVariants\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY", + glue("{pref}/{pref}.fragment_length_hist\\.json\\.gz$"), "flh", + glue("{pref}/{pref}.TargetRegionCoverage\\.json\\.gz$"), "trc", + glue("{pref}/{pref}.tmb\\.json\\.gz$"), "tmb", + glue("{pref}/{pref}.msi\\.json\\.gz$"), "msi", + glue("{pref}/{pref}_Fusions\\.csv$"), "fus" ) |> - dplyr::mutate(fun = paste0("read_", .data$fun)) + dplyr::mutate( + fun = paste0("read_", .data$fun), + fun = ifelse(.data$fun == "read_DOWNLOAD_ONLY", "DOWNLOAD_ONLY", .data$fun) + ) super$initialize(path = path, wname = wname, regexes = regexes) self$SampleID <- SampleID @@ -67,7 +99,67 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class( print(res) invisible(self) }, - ) + #' @description Read `SampleAnalysisResults.json.gz` file. + #' @param x Path to file. + read_sar = function(x) { + TsoSampleAnalysisResultsFile$new(x)$read() + }, + #' @description Read `TMB_Trace.tsv` file. + #' @param x Path to file. + read_tmbt = function(x) { + TsoTmbTraceTsvFile$new(x)$read() + }, + #' @description Read `AlignCollapseFusionCaller_metrics.json.gz` file. + #' @param x Path to file. + read_acfc = function(x) { + TsoAlignCollapseFusionCallerMetricsFile$new(x)$read() + }, + #' @description Read `MergedSmallVariants.vcf.gz` file. + #' @param x Path to file. + read_msv = function(x) { + TsoMergedSmallVariantsVcfFile$new(x)$read() + }, + #' @description Read `MergedSmallVariants.genome.vcf.gz` file. + #' @param x Path to file. + read_msvg = function(x) { + TsoMergedSmallVariantsGenomeVcfFile$new(x)$read() + }, + #' @description Read `CombinedVariantOutput.tsv` file. + #' @param x Path to file. + read_cvo = function(x) { + TsoCombinedVariantOutputFile$new(x)$read() + }, + #' @description Read `CopyNumberVariants.vcf.gz` file. + #' @param x Path to file. + read_cnv = function(x) { + TsoCopyNumberVariantsVcfFile$new(x)$read() + }, + #' @description Read `fragment_length_hist.json.gz` file. + #' @param x Path to file. + read_flh = function(x) { + TsoFragmentLengthHistFile$new(x)$read() + }, + #' @description Read `TargetRegionCoverage.json.gz` file. + #' @param x Path to file. + read_trc = function(x) { + TsoTargetRegionCoverageFile$new(x)$read() + }, + #' @description Read `tmb.json.gz` file. + #' @param x Path to file. + read_tmb = function(x) { + TsoTmbFile$new(x)$read() + }, + #' @description Read `msi.json.gz` file. + #' @param x Path to file. + read_msi = function(x) { + TsoMsiFile$new(x)$read() + }, + #' @description Read `Fusions.csv` file. + #' @param x Path to file. + read_fus = function(x) { + TsoFusionsCsvFile$new(x)$read() + } + ) # end public ) #' TsoCombinedVariantOutputFile R6 Class diff --git a/man/Wf_tso_ctdna_tumor_only.Rd b/man/Wf_tso_ctdna_tumor_only.Rd index 0af943d..1c3a807 100644 --- a/man/Wf_tso_ctdna_tumor_only.Rd +++ b/man/Wf_tso_ctdna_tumor_only.Rd @@ -2,38 +2,335 @@ % Please edit documentation in R/tso.R \name{Wf_tso_ctdna_tumor_only} \alias{Wf_tso_ctdna_tumor_only} -\title{tso_ctdna_tumor_only Wf R6 Class} +\title{Wf_tso_ctdna_tumor_only R6 Class} \description{ -Contains methods for reading and processing files output from the UMCCR -\code{tso_ctdna_tumor_only} workflow. +Reads and writes tidy versions of files from the \code{tso_ctdna_tumor_only} workflow. } \examples{ \dontrun{ -x <- file.path( - "~/icav1/g/production/analysis_data/SBJ00596/tso_ctdna_tumor_only", - "2024050555972acf/L2400482/Results/PTC_ctTSO240429_L2400482/dracarys_gds_sync" + +#---- Local ----# +p <- file.path( + "~/icav1/g/production/analysis_data/SBJ04651/tso_ctdna_tumor_only", + "20240223d1951163/L2400183/Results" +) +SampleID <- "PRJ230876" +LibraryID <- "L2400183" +prefix <- glue("{SampleID}__{LibraryID}") +t1 <- Wf_tso_ctdna_tumor_only$new(path = p, SampleID = SampleID, LibraryID = LibraryID) +t1$list_files(max_files = 20) +t1$list_files_filter_relevant(max_files = 300) + +#---- GDS ----# +p <- file.path( + "gds://production/analysis_data/SBJ04651/tso_ctdna_tumor_only", + "20240223d1951163/L2400183/Results" +) + +outdir <- file.path(sub("gds:/", "~/icav1/g", p)) +token <- Sys.getenv("ICA_ACCESS_TOKEN") +t2 <- Wf_tso_ctdna_tumor_only$new(path = p, SampleID = SampleID, LibraryID = LibraryID) +t2$list_files(max_files = 100) +t2$list_files_filter_relevant(max_files = 100) +d <- t2$download_files( + outdir = outdir, ica_token = token, + max_files = 100, dryrun = F +) +d_tidy <- t2$tidy_files(d) +d_write <- t2$write( + d_tidy, + outdir = file.path(p, "dracarys_tidy"), + prefix = prefix, + format = "tsv" ) -sample_id <- "PTC_ctTSO240429" -library_id <- "L2400482" -d <- TsoCombinedVariantOutputFile$new(x) -d$read() } } +\section{Super class}{ +\code{\link[dracarys:Wf]{dracarys::Wf}} -> \code{Wf_tso_ctdna_tumor_only} +} \section{Public fields}{ \if{html}{\out{
}} \describe{ -\item{\code{sid}}{SampleID.} +\item{\code{SampleID}}{The SampleID of the tumor sample (needed for path lookup).} -\item{\code{lid}}{LibraryID.} +\item{\code{LibraryID}}{The LibraryID of the tumor sample (needed for path lookup).} } \if{html}{\out{
}} } \section{Methods}{ \subsection{Public methods}{ \itemize{ +\item \href{#method-Wf_tso_ctdna_tumor_only-new}{\code{Wf_tso_ctdna_tumor_only$new()}} +\item \href{#method-Wf_tso_ctdna_tumor_only-print}{\code{Wf_tso_ctdna_tumor_only$print()}} +\item \href{#method-Wf_tso_ctdna_tumor_only-read_sar}{\code{Wf_tso_ctdna_tumor_only$read_sar()}} +\item \href{#method-Wf_tso_ctdna_tumor_only-read_tmbt}{\code{Wf_tso_ctdna_tumor_only$read_tmbt()}} +\item \href{#method-Wf_tso_ctdna_tumor_only-read_acfc}{\code{Wf_tso_ctdna_tumor_only$read_acfc()}} +\item \href{#method-Wf_tso_ctdna_tumor_only-read_msv}{\code{Wf_tso_ctdna_tumor_only$read_msv()}} +\item \href{#method-Wf_tso_ctdna_tumor_only-read_msvg}{\code{Wf_tso_ctdna_tumor_only$read_msvg()}} +\item \href{#method-Wf_tso_ctdna_tumor_only-read_cvo}{\code{Wf_tso_ctdna_tumor_only$read_cvo()}} +\item \href{#method-Wf_tso_ctdna_tumor_only-read_cnv}{\code{Wf_tso_ctdna_tumor_only$read_cnv()}} +\item \href{#method-Wf_tso_ctdna_tumor_only-read_flh}{\code{Wf_tso_ctdna_tumor_only$read_flh()}} +\item \href{#method-Wf_tso_ctdna_tumor_only-read_trc}{\code{Wf_tso_ctdna_tumor_only$read_trc()}} +\item \href{#method-Wf_tso_ctdna_tumor_only-read_tmb}{\code{Wf_tso_ctdna_tumor_only$read_tmb()}} +\item \href{#method-Wf_tso_ctdna_tumor_only-read_msi}{\code{Wf_tso_ctdna_tumor_only$read_msi()}} +\item \href{#method-Wf_tso_ctdna_tumor_only-read_fus}{\code{Wf_tso_ctdna_tumor_only$read_fus()}} \item \href{#method-Wf_tso_ctdna_tumor_only-clone}{\code{Wf_tso_ctdna_tumor_only$clone()}} } } +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-new}{}}} +\subsection{Method \code{new()}}{ +Create a new Wf_tso_ctdna_tumor_only object. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only$new(path = NULL, SampleID = NULL, LibraryID = NULL)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{path}}{Path to directory with raw workflow results (from GDS, S3, or +local filesystem).} + +\item{\code{SampleID}}{The SampleID of the tumor sample (needed for path lookup).} + +\item{\code{LibraryID}}{The LibraryID of the sample (needed for path lookup).} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-print}{}}} +\subsection{Method \code{print()}}{ +Print details about the Workflow. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{(ignored).} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_sar}{}}} +\subsection{Method \code{read_sar()}}{ +Read \code{SampleAnalysisResults.json.gz} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only$read_sar(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_tmbt}{}}} +\subsection{Method \code{read_tmbt()}}{ +Read \code{TMB_Trace.tsv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only$read_tmbt(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_acfc}{}}} +\subsection{Method \code{read_acfc()}}{ +Read \code{AlignCollapseFusionCaller_metrics.json.gz} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only$read_acfc(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_msv}{}}} +\subsection{Method \code{read_msv()}}{ +Read \code{MergedSmallVariants.vcf.gz} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only$read_msv(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_msvg}{}}} +\subsection{Method \code{read_msvg()}}{ +Read \code{MergedSmallVariants.genome.vcf.gz} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only$read_msvg(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_cvo}{}}} +\subsection{Method \code{read_cvo()}}{ +Read \code{CombinedVariantOutput.tsv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only$read_cvo(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_cnv}{}}} +\subsection{Method \code{read_cnv()}}{ +Read \code{CopyNumberVariants.vcf.gz} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only$read_cnv(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_flh}{}}} +\subsection{Method \code{read_flh()}}{ +Read \code{fragment_length_hist.json.gz} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only$read_flh(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_trc}{}}} +\subsection{Method \code{read_trc()}}{ +Read \code{TargetRegionCoverage.json.gz} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only$read_trc(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_tmb}{}}} +\subsection{Method \code{read_tmb()}}{ +Read \code{tmb.json.gz} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only$read_tmb(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_msi}{}}} +\subsection{Method \code{read_msi()}}{ +Read \code{msi.json.gz} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only$read_msi(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-read_fus}{}}} +\subsection{Method \code{read_fus()}}{ +Read \code{Fusions.csv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only$read_fus(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only-clone}{}}} From 6cd5a7c9858671e0fccc4047115c04d6632f90a4 Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Fri, 20 Sep 2024 15:47:52 +1000 Subject: [PATCH 4/8] tso: output tidy data in list col --- R/tso.R | 56 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/R/tso.R b/R/tso.R index df12e71..883e820 100644 --- a/R/tso.R +++ b/R/tso.R @@ -17,6 +17,8 @@ #' t1 <- Wf_tso_ctdna_tumor_only$new(path = p, SampleID = SampleID, LibraryID = LibraryID) #' t1$list_files(max_files = 20) #' t1$list_files_filter_relevant(max_files = 300) +#' d <- t1$download_files(max_files = 100, dryrun = F) +#' d_tidy <- t1$tidy_files(d) #' #' #---- GDS ----# #' p <- file.path( @@ -65,8 +67,8 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class( glue("{pref}/{pref}.AlignCollapseFusionCaller_metrics\\.json\\.gz$"), "acfc", glue("{pref}/{pref}_MergedSmallVariants\\.vcf\\.gz$"), "msv", glue("{pref}/{pref}_MergedSmallVariants\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY", - glue("{pref}/{pref}_MergedSmallVariants\\.genome\\.vcf\\.gz$"), "DOWNLOAD_ONLY", - glue("{pref}/{pref}_MergedSmallVariants\\.genome\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY", + # glue("{pref}/{pref}_MergedSmallVariants\\.genome\\.vcf\\.gz$"), "DOWNLOAD_ONLY", + # glue("{pref}/{pref}_MergedSmallVariants\\.genome\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY", glue("{pref}/{pref}_CombinedVariantOutput\\.tsv$"), "cvo", glue("{pref}/{pref}_CopyNumberVariants\\.vcf\\.gz$"), "cnv", glue("{pref}/{pref}_CopyNumberVariants\\.vcf\\.gz\\.tbi$"), "DOWNLOAD_ONLY", @@ -99,65 +101,91 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class( print(res) invisible(self) }, + #' @description Tidy given files. + #' @param x Tibble with `localpath` to file and the function `type` to parse it. + tidy_files = function(x) { + assertthat::assert_that(is.data.frame(x)) + assertthat::assert_that(all(c("type", "localpath") %in% colnames(x))) + d1 <- x |> + dplyr::filter(.data$type != "DOWNLOAD_ONLY") |> + dplyr::rowwise() |> + dplyr::mutate( + data = list(dr_func_eval(f = .data$type, v = .data$type, envir = self)(.data$localpath)) + ) |> + dplyr::ungroup() + d1 + }, #' @description Read `SampleAnalysisResults.json.gz` file. #' @param x Path to file. read_sar = function(x) { - TsoSampleAnalysisResultsFile$new(x)$read() + TsoSampleAnalysisResultsFile$new(x)$read() |> + tibble::enframe(name = "name", value = "data") }, #' @description Read `TMB_Trace.tsv` file. #' @param x Path to file. read_tmbt = function(x) { - TsoTmbTraceTsvFile$new(x)$read() + dat <- TsoTmbTraceTsvFile$new(x)$read() + tibble::tibble(name = "tmb_trace", data = list(dat)) }, #' @description Read `AlignCollapseFusionCaller_metrics.json.gz` file. #' @param x Path to file. read_acfc = function(x) { - TsoAlignCollapseFusionCallerMetricsFile$new(x)$read() + TsoAlignCollapseFusionCallerMetricsFile$new(x)$read() |> + tibble::enframe(name = "name", value = "data") }, #' @description Read `MergedSmallVariants.vcf.gz` file. #' @param x Path to file. read_msv = function(x) { - TsoMergedSmallVariantsVcfFile$new(x)$read() + dat <- TsoMergedSmallVariantsVcfFile$new(x)$read() + tibble::tibble(name = "merged_smallv", data = list(dat)) }, #' @description Read `MergedSmallVariants.genome.vcf.gz` file. #' @param x Path to file. read_msvg = function(x) { - TsoMergedSmallVariantsGenomeVcfFile$new(x)$read() + dat <- TsoMergedSmallVariantsGenomeVcfFile$new(x)$read() + tibble::tibble(name = "merged_smallvg", data = list(dat)) }, #' @description Read `CombinedVariantOutput.tsv` file. #' @param x Path to file. read_cvo = function(x) { - TsoCombinedVariantOutputFile$new(x)$read() + dat <- TsoCombinedVariantOutputFile$new(x)$read() + tibble::tibble(name = "combined_var", data = list(dat)) }, #' @description Read `CopyNumberVariants.vcf.gz` file. #' @param x Path to file. read_cnv = function(x) { - TsoCopyNumberVariantsVcfFile$new(x)$read() + dat <- TsoCopyNumberVariantsVcfFile$new(x)$read() + tibble::tibble(name = "cnv", data = list(dat)) }, #' @description Read `fragment_length_hist.json.gz` file. #' @param x Path to file. read_flh = function(x) { - TsoFragmentLengthHistFile$new(x)$read() + dat <- TsoFragmentLengthHistFile$new(x)$read() + tibble::tibble(name = "fraglenhist", data = list(dat)) }, #' @description Read `TargetRegionCoverage.json.gz` file. #' @param x Path to file. read_trc = function(x) { - TsoTargetRegionCoverageFile$new(x)$read() + dat <- TsoTargetRegionCoverageFile$new(x)$read() + tibble::tibble(name = "targetcvg", data = list(dat)) }, #' @description Read `tmb.json.gz` file. #' @param x Path to file. read_tmb = function(x) { - TsoTmbFile$new(x)$read() + dat <- TsoTmbFile$new(x)$read() + tibble::tibble(name = "tmb", data = list(dat)) }, #' @description Read `msi.json.gz` file. #' @param x Path to file. read_msi = function(x) { - TsoMsiFile$new(x)$read() + dat <- TsoMsiFile$new(x)$read() + tibble::tibble(name = "msi", data = list(dat)) }, #' @description Read `Fusions.csv` file. #' @param x Path to file. read_fus = function(x) { - TsoFusionsCsvFile$new(x)$read() + dat <- TsoFusionsCsvFile$new(x)$read() + tibble::tibble(name = "fusions", data = list(dat)) } ) # end public ) From 185c7aa3b8ba63cbc7603b641d2db8cc1d4471b3 Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Sat, 21 Sep 2024 10:06:42 +1000 Subject: [PATCH 5/8] remove spec_tbl_df subclass by [] --- R/tso.R | 8 +++++--- R/utils.R | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/R/tso.R b/R/tso.R index 883e820..6d2b1bd 100644 --- a/R/tso.R +++ b/R/tso.R @@ -228,7 +228,7 @@ TsoCombinedVariantOutputFile <- R6::R6Class( if (length(smallv) == 0 || ln[(smallv + 2)] == "NA\t\t") { return(empty_tbl(names(nm_map))) } - ln[(smallv + 1):length(ln)] |> + d <- ln[(smallv + 1):length(ln)] |> I() |> # read parsed data as-is readr::read_tsv( col_names = TRUE, col_types = readr::cols( @@ -239,6 +239,7 @@ TsoCombinedVariantOutputFile <- R6::R6Class( ) ) |> dplyr::rename(dplyr::any_of(nm_map)) + d[] }, #' @description #' Writes a tidy version of the `CombinedVariantOutput.tsv` (only Small Variants) @@ -429,7 +430,8 @@ TsoTmbTraceTsvFile <- R6::R6Class( GermlineFilterDatabase = "l", GermlineFilterProxi = "l", CodingVariant = "l", Nonsynonymous = "l", IncludedInTMBNumerator = "l" ) - readr::read_tsv(x, col_types = ct) + d <- readr::read_tsv(x, col_types = ct) + d[] }, #' @description @@ -731,7 +733,7 @@ TsoFusionsCsvFile <- R6::R6Class( if (nrow(res) == 0) { return(empty_tbl(cnames = names(ct))) } - return(res) + return(res[]) }, #' @description diff --git a/R/utils.R b/R/utils.R index 378588b..8691826 100644 --- a/R/utils.R +++ b/R/utils.R @@ -157,7 +157,8 @@ write_dracarys_list_of_tbls <- function(list_of_tbls, out_dir = NULL, prefix = N #' @return A tibble with 0 rows and the given column names. #' @export empty_tbl <- function(cnames, ctypes = readr::cols(.default = "c")) { - readr::read_csv("\n", col_names = cnames, col_types = ctypes) + d <- readr::read_csv("\n", col_names = cnames, col_types = ctypes) + d[] } read_tsvgz <- function(x, ...) { From cb0bfbed58658780d063700c1fdc46c0254f6e8f Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Sat, 21 Sep 2024 11:45:09 +1000 Subject: [PATCH 6/8] tso: make table names more consistent --- R/tso.R | 18 +++++++++--------- R/tso_acfc.R | 29 +++++++++++++++++++---------- R/tso_sar.R | 18 +++++++++--------- 3 files changed, 37 insertions(+), 28 deletions(-) diff --git a/R/tso.R b/R/tso.R index 6d2b1bd..5dc6d14 100644 --- a/R/tso.R +++ b/R/tso.R @@ -113,43 +113,43 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class( data = list(dr_func_eval(f = .data$type, v = .data$type, envir = self)(.data$localpath)) ) |> dplyr::ungroup() - d1 + d1 |> + dplyr::select("data") |> + tidyr::unnest("data") }, #' @description Read `SampleAnalysisResults.json.gz` file. #' @param x Path to file. read_sar = function(x) { - TsoSampleAnalysisResultsFile$new(x)$read() |> - tibble::enframe(name = "name", value = "data") + TsoSampleAnalysisResultsFile$new(x)$read() }, #' @description Read `TMB_Trace.tsv` file. #' @param x Path to file. read_tmbt = function(x) { dat <- TsoTmbTraceTsvFile$new(x)$read() - tibble::tibble(name = "tmb_trace", data = list(dat)) + tibble::tibble(name = "tmbtrace", data = list(dat)) }, #' @description Read `AlignCollapseFusionCaller_metrics.json.gz` file. #' @param x Path to file. read_acfc = function(x) { - TsoAlignCollapseFusionCallerMetricsFile$new(x)$read() |> - tibble::enframe(name = "name", value = "data") + TsoAlignCollapseFusionCallerMetricsFile$new(x)$read() }, #' @description Read `MergedSmallVariants.vcf.gz` file. #' @param x Path to file. read_msv = function(x) { dat <- TsoMergedSmallVariantsVcfFile$new(x)$read() - tibble::tibble(name = "merged_smallv", data = list(dat)) + tibble::tibble(name = "mergedsmallv", data = list(dat)) }, #' @description Read `MergedSmallVariants.genome.vcf.gz` file. #' @param x Path to file. read_msvg = function(x) { dat <- TsoMergedSmallVariantsGenomeVcfFile$new(x)$read() - tibble::tibble(name = "merged_smallvg", data = list(dat)) + tibble::tibble(name = "mergedsmallvg", data = list(dat)) }, #' @description Read `CombinedVariantOutput.tsv` file. #' @param x Path to file. read_cvo = function(x) { dat <- TsoCombinedVariantOutputFile$new(x)$read() - tibble::tibble(name = "combined_var", data = list(dat)) + tibble::tibble(name = "combinedvaro", data = list(dat)) }, #' @description Read `CopyNumberVariants.vcf.gz` file. #' @param x Path to file. diff --git a/R/tso_acfc.R b/R/tso_acfc.R index 5f726b5..7db5939 100644 --- a/R/tso_acfc.R +++ b/R/tso_acfc.R @@ -44,6 +44,12 @@ TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class( s1 = c("MappingAligningPerRg", "MappingAligningSummary", "TrimmerStatistics", "CoverageSummary"), s2 = c("UmiStatistics", "SvSummary", "RunTime") ) + s1_new <- c( + "MappingAligningPerRg" = "acfc_maprg", + "MappingAligningSummary" = "acfc_map", + "TrimmerStatistics" = "acfc_trim", + "CoverageSummary" = "acfc_cvg" + ) secs2 <- unlist(secs, use.names = FALSE) secs_in_list <- secs2[secs2 %in% names(j)] # just extract the following elements if they exist @@ -54,9 +60,10 @@ TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class( # Pivot all metrics for easier ingestion, # and utilise the multiqc parser to rename dirty columns. # Keeping each list section separate for flexibility. - for (sec in secs$s1) { + for (sec in secs[["s1"]]) { if (sec %in% names(d)) { - d[[sec]] <- d[[sec]] |> + new_nm <- s1_new[sec] + d[[new_nm]] <- d[[sec]] |> tidyr::pivot_longer(cols = c("value", "percent"), names_to = "name1", values_to = "value1") |> dplyr::filter(!is.na(.data$value1)) |> dplyr::mutate( @@ -68,11 +75,12 @@ TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class( dplyr::mutate(umccr_workflow = "dragen_ctdna") |> multiqc_rename_cols() |> dplyr::select(-"umccr_workflow") + d[[sec]] <- NULL } } if ("UmiStatistics" %in% names(d)) { # handle non-hist data - d[["UmiStatisticsMain"]] <- d[["UmiStatistics"]] |> + d[["acfc_umistats"]] <- d[["UmiStatistics"]] |> dplyr::filter(!grepl("Hist", .data$name)) |> tidyr::pivot_longer(cols = c("value", "percent"), names_to = "name1", values_to = "value1") |> dplyr::filter(!is.na(.data$value1)) |> @@ -86,7 +94,7 @@ TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class( multiqc_rename_cols() |> dplyr::select(-"umccr_workflow") # handle hist data - d[["UmiStatisticsHist"]] <- d[["UmiStatistics"]] |> + d[["acfc_umistatshist"]] <- d[["UmiStatistics"]] |> dplyr::filter(grepl("Hist", .data$name)) |> dplyr::mutate( name = sub("Histogram of ", "", .data$name), @@ -101,26 +109,27 @@ TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class( d[["UmiStatistics"]] <- NULL } if ("SvSummary" %in% names(d)) { - d[["SvSummary"]] <- d[["SvSummary"]] |> + d[["acfc_svsum"]] <- d[["SvSummary"]] |> dplyr::mutate( name = sub("Number of (.*) \\(PASS\\)", "\\1", .data$name), name = sub("breakend pairs", "bnd_pairs", .data$name), value = as.numeric(.data$value) ) |> tidyr::pivot_wider(names_from = "name", values_from = "value") + d[["SvSummary"]] <- NULL } if ("RunTime" %in% names(d)) { # just keep the 'percent' column (number of seconds) - d[["RunTime"]] <- d[["RunTime"]] |> + d[["acfc_runtime"]] <- d[["RunTime"]] |> dplyr::mutate( seconds = as.numeric(.data$percent), name = tools::toTitleCase(sub("Time ", "", .data$name)) ) |> dplyr::select("name", "seconds") |> tidyr::pivot_wider(names_from = "name", values_from = "seconds") + d[["RunTime"]] <- NULL } - # keep as list - d + tibble::enframe(d, name = "name", value = "data") }, #' @description @@ -167,7 +176,7 @@ TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class( #' @param max_num Maximum number to display in both plots. #' @return Both histogram plot objects. plot = function(d, max_num = 15) { - if (is.null(d[["UmiStatisticsHist"]])) { + if (is.null(d[["acfc_umistatshist"]])) { return( list( p_num_supporting_fragments = NULL, @@ -175,7 +184,7 @@ TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class( ) ) } - h <- d[["UmiStatisticsHist"]] + h <- d[["acfc_umistatshist"]] # 15 seems like a good cutoff for both plots p1 <- h |> dplyr::filter( diff --git a/R/tso_sar.R b/R/tso_sar.R index 511b1f8..dba04c6 100644 --- a/R/tso_sar.R +++ b/R/tso_sar.R @@ -123,15 +123,15 @@ TsoSampleAnalysisResultsFile <- R6::R6Class( empty_tbl2() } - res <- list( - sampleinfo = sampleinfo, - qc = qc, - swconfds = sw[["data_sources"]], - swconfother = sw[["other"]], - snv = snvs, - cnv = cnvs - ) - res + list( + sar_sampleinfo = sampleinfo, + sar_qc = qc, + sar_swconfds = sw[["data_sources"]], + sar_swconfother = sw[["other"]], + sar_snv = snvs, + sar_cnv = cnvs + ) |> + tibble::enframe(name = "name", value = "data") }, #' @description #' Writes a tidy version of the `SampleAnalysisResults.json.gz` file output From bb854c2124cb699fe8b06b380b0da97773ae8668 Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Sat, 21 Sep 2024 11:51:52 +1000 Subject: [PATCH 7/8] tidy_files refactor --- R/Wf.R | 2 +- R/tidy.R | 19 +++++++++++++++---- R/tso.R | 16 ---------------- R/tso_acfc.R | 2 +- R/tso_sar.R | 2 +- ...TsoAlignCollapseFusionCallerMetricsFile.Rd | 2 +- man/TsoSampleAnalysisResultsFile.Rd | 2 +- man/Wf.Rd | 2 +- man/Wf_tso_ctdna_tumor_only.Rd | 2 ++ man/tidy_files.Rd | 16 +++++++++++----- 10 files changed, 34 insertions(+), 31 deletions(-) diff --git a/R/Wf.R b/R/Wf.R index 3e00027..6863046 100644 --- a/R/Wf.R +++ b/R/Wf.R @@ -200,7 +200,7 @@ Wf <- R6::R6Class( tidy_files(x, envir = self) }, #' @description Write tidy data. - #' @param x Tibble with tidy `data` and file `type`. + #' @param x Tibble with tidy `data` list-column. #' @param outdir Directory path to output tidy files. #' @param prefix Prefix of output files. #' @param format Format of output files. diff --git a/R/tidy.R b/R/tidy.R index 924620d..a8c4780 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -1,6 +1,10 @@ #' Tidy Files #' +#' Tidies files into a tibble with parsed data. +#' #' @param x Tibble with `localpath` to file and the function `type` to parse it. +#' The function must return a tibble with a `name` column and the tidied `data` +#' as a list-column (see example). #' @param envir the environment in which to evaluate the function e.g. use `self` #' when using inside R6 classes. #' @@ -8,9 +12,13 @@ #' @examples #' \dontrun{ #' p1 <- "~/icav1/g/production/analysis_data/SBJ01155/umccrise/202408300c218043" -#' p2 <- "L2101566__L2101565/SBJ01155__PRJ211091-qc_summary.tsv.gz" -#' p <- file.path(p1, p2) -#' x <- tibble::tibble(type = "readr::read_tsv", localpath = p) +#' p2 <- "L2101566__L2101565/SBJ01155__PRJ211091/cancer_report_tables" +#' p <- file.path(p1, p2, "SBJ01155__PRJ211091-qc_summary.tsv.gz") +#' fun <- function(x) { +#' d <- readr::read_tsv(x) +#' tibble::tibble(name = "table1", data = list(d[])) +#' } +#' x <- tibble::tibble(type = "fun", localpath = p) #' tidy_files(x) #' } #' @@ -23,7 +31,10 @@ tidy_files <- function(x, envir = parent.frame()) { dplyr::rowwise() |> dplyr::mutate( data = list(dr_func_eval(f = .data$type, v = .data$type, envir = envir)(.data$localpath)) - ) + ) |> + dplyr::ungroup() |> + dplyr::select("data") |> + tidyr::unnest("data") } #' Tidy UMCCR Results diff --git a/R/tso.R b/R/tso.R index 5dc6d14..65fb371 100644 --- a/R/tso.R +++ b/R/tso.R @@ -101,22 +101,6 @@ Wf_tso_ctdna_tumor_only <- R6::R6Class( print(res) invisible(self) }, - #' @description Tidy given files. - #' @param x Tibble with `localpath` to file and the function `type` to parse it. - tidy_files = function(x) { - assertthat::assert_that(is.data.frame(x)) - assertthat::assert_that(all(c("type", "localpath") %in% colnames(x))) - d1 <- x |> - dplyr::filter(.data$type != "DOWNLOAD_ONLY") |> - dplyr::rowwise() |> - dplyr::mutate( - data = list(dr_func_eval(f = .data$type, v = .data$type, envir = self)(.data$localpath)) - ) |> - dplyr::ungroup() - d1 |> - dplyr::select("data") |> - tidyr::unnest("data") - }, #' @description Read `SampleAnalysisResults.json.gz` file. #' @param x Path to file. read_sar = function(x) { diff --git a/R/tso_acfc.R b/R/tso_acfc.R index 7db5939..ec56966 100644 --- a/R/tso_acfc.R +++ b/R/tso_acfc.R @@ -10,7 +10,7 @@ #' ) #' m <- TsoAlignCollapseFusionCallerMetricsFile$new(x) #' d_parsed <- m$read() # or read(m) -#' m$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = c("tsv", "rds")) +#' # m$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = c("tsv", "rds")) #' @export TsoAlignCollapseFusionCallerMetricsFile <- R6::R6Class( "TsoAlignCollapseFusionCallerMetricsFile", diff --git a/R/tso_sar.R b/R/tso_sar.R index dba04c6..d691ffa 100644 --- a/R/tso_sar.R +++ b/R/tso_sar.R @@ -8,7 +8,7 @@ #' x <- system.file("extdata/tso/sample705_SampleAnalysisResults.json.gz", package = "dracarys") #' res <- TsoSampleAnalysisResultsFile$new(x) #' d_parsed <- res$read() # or read(res) -#' res$write(d_parsed, tempfile(), "tsv") +#' # res$write(d_parsed, tempfile(), "tsv") #' @export TsoSampleAnalysisResultsFile <- R6::R6Class( "TsoSampleAnalysisResultsFile", diff --git a/man/TsoAlignCollapseFusionCallerMetricsFile.Rd b/man/TsoAlignCollapseFusionCallerMetricsFile.Rd index 20ea0d9..b450a8e 100644 --- a/man/TsoAlignCollapseFusionCallerMetricsFile.Rd +++ b/man/TsoAlignCollapseFusionCallerMetricsFile.Rd @@ -13,7 +13,7 @@ x <- system.file("extdata/tso/sample705.AlignCollapseFusionCaller_metrics.json.g ) m <- TsoAlignCollapseFusionCallerMetricsFile$new(x) d_parsed <- m$read() # or read(m) -m$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = c("tsv", "rds")) +#m$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = c("tsv", "rds")) } \section{Super class}{ \code{\link[dracarys:File]{dracarys::File}} -> \code{TsoAlignCollapseFusionCallerMetricsFile} diff --git a/man/TsoSampleAnalysisResultsFile.Rd b/man/TsoSampleAnalysisResultsFile.Rd index b051d8a..5886b18 100644 --- a/man/TsoSampleAnalysisResultsFile.Rd +++ b/man/TsoSampleAnalysisResultsFile.Rd @@ -11,7 +11,7 @@ Contains methods for reading and displaying contents of the x <- system.file("extdata/tso/sample705_SampleAnalysisResults.json.gz", package = "dracarys") res <- TsoSampleAnalysisResultsFile$new(x) d_parsed <- res$read() # or read(res) -res$write(d_parsed, tempfile(), "tsv") +#res$write(d_parsed, tempfile(), "tsv") } \section{Super class}{ \code{\link[dracarys:File]{dracarys::File}} -> \code{TsoSampleAnalysisResultsFile} diff --git a/man/Wf.Rd b/man/Wf.Rd index 3005725..b2c99f3 100644 --- a/man/Wf.Rd +++ b/man/Wf.Rd @@ -255,7 +255,7 @@ Write tidy data. \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{x}}{Tibble with tidy \code{data} and file \code{type}.} +\item{\code{x}}{Tibble with tidy \code{data} list-column.} \item{\code{outdir}}{Directory path to output tidy files.} diff --git a/man/Wf_tso_ctdna_tumor_only.Rd b/man/Wf_tso_ctdna_tumor_only.Rd index 1c3a807..799b90e 100644 --- a/man/Wf_tso_ctdna_tumor_only.Rd +++ b/man/Wf_tso_ctdna_tumor_only.Rd @@ -20,6 +20,8 @@ prefix <- glue("{SampleID}__{LibraryID}") t1 <- Wf_tso_ctdna_tumor_only$new(path = p, SampleID = SampleID, LibraryID = LibraryID) t1$list_files(max_files = 20) t1$list_files_filter_relevant(max_files = 300) +d <- t1$download_files(max_files = 100, dryrun = F) +d_tidy <- t1$tidy_files(d) #---- GDS ----# p <- file.path( diff --git a/man/tidy_files.Rd b/man/tidy_files.Rd index 8d1849b..caff8d2 100644 --- a/man/tidy_files.Rd +++ b/man/tidy_files.Rd @@ -7,7 +7,9 @@ tidy_files(x, envir = parent.frame()) } \arguments{ -\item{x}{Tibble with \code{localpath} to file and the function \code{type} to parse it.} +\item{x}{Tibble with \code{localpath} to file and the function \code{type} to parse it. +The function must return a tibble with a \code{name} column and the tidied \code{data} +as a list-column (see example).} \item{envir}{the environment in which to evaluate the function e.g. use \code{self} when using inside R6 classes.} @@ -16,14 +18,18 @@ when using inside R6 classes.} Tibble with parsed data in a \code{data} list-column. } \description{ -Tidy Files +Tidies files into a tibble with parsed data. } \examples{ \dontrun{ p1 <- "~/icav1/g/production/analysis_data/SBJ01155/umccrise/202408300c218043" -p2 <- "L2101566__L2101565/SBJ01155__PRJ211091-qc_summary.tsv.gz" -p <- file.path(p1, p2) -x <- tibble::tibble(type = "readr::read_tsv", localpath = p) +p2 <- "L2101566__L2101565/SBJ01155__PRJ211091/cancer_report_tables" +p <- file.path(p1, p2, "SBJ01155__PRJ211091-qc_summary.tsv.gz") +fun <- function(x) { + d <- readr::read_tsv(x) + tibble::tibble(name = "table1", data = list(d[])) +} +x <- tibble::tibble(type = "fun", localpath = p) tidy_files(x) } From 108c07f64a4f0f8d9f12cbfdf13bf18be423a868 Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Sat, 21 Sep 2024 12:14:06 +1000 Subject: [PATCH 8/8] Wf: fix writer --- R/Wf.R | 5 ++--- R/tso.R | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/R/Wf.R b/R/Wf.R index 6863046..128aabf 100644 --- a/R/Wf.R +++ b/R/Wf.R @@ -213,12 +213,11 @@ Wf <- R6::R6Class( d_write <- x |> dplyr::rowwise() |> dplyr::mutate( - section = sub("read_", "", .data$type), - p = glue("{prefix}_{.data$section}"), + p = glue("{prefix}_{.data$name}"), out = list(write_dracarys(obj = .data$data, prefix = .data$p, out_format = format, drid = drid)) ) |> dplyr::ungroup() |> - dplyr::select("section", "data") |> + dplyr::select("name", "data") |> tibble::deframe() invisible(d_write) } diff --git a/R/tso.R b/R/tso.R index 65fb371..68dcb1e 100644 --- a/R/tso.R +++ b/R/tso.R @@ -19,13 +19,21 @@ #' t1$list_files_filter_relevant(max_files = 300) #' d <- t1$download_files(max_files = 100, dryrun = F) #' d_tidy <- t1$tidy_files(d) +#' d_write <- t1$write( +#' d_tidy, +#' outdir = file.path(p, "dracarys_tidy"), +#' prefix = prefix, +#' format = "tsv" +#' ) #' #' #---- GDS ----# #' p <- file.path( -#' "gds://production/analysis_data/SBJ04651/tso_ctdna_tumor_only", -#' "20240223d1951163/L2400183/Results" +#' "gds://production/analysis_data/SBJ05563/tso_ctdna_tumor_only", +#' "20240914d41300cd/L2401388/Results" #' ) -#' +#' SampleID <- "PRJ241446" +#' LibraryID <- "L2401388" +#' prefix <- glue("{SampleID}__{LibraryID}") #' outdir <- file.path(sub("gds:/", "~/icav1/g", p)) #' token <- Sys.getenv("ICA_ACCESS_TOKEN") #' t2 <- Wf_tso_ctdna_tumor_only$new(path = p, SampleID = SampleID, LibraryID = LibraryID) @@ -38,7 +46,7 @@ #' d_tidy <- t2$tidy_files(d) #' d_write <- t2$write( #' d_tidy, -#' outdir = file.path(p, "dracarys_tidy"), +#' outdir = file.path(outdir, "dracarys_tidy"), #' prefix = prefix, #' format = "tsv" #' )