diff --git a/NAMESPACE b/NAMESPACE index a4a7519..4cf547e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -36,6 +36,7 @@ export(dr_gds_download) export(dr_output_format_valid) export(dr_s3_download) export(dtw_Wf_tso_ctdna_tumor_only) +export(dtw_Wf_tso_ctdna_tumor_only_v2) export(empty_tbl) export(file_regex_getter) export(gds_file_download_api) diff --git a/R/tsov2.R b/R/tsov2.R index 6f9d2a8..9a41204 100644 --- a/R/tsov2.R +++ b/R/tsov2.R @@ -11,8 +11,8 @@ #' "~/s3/pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production", #' "analysis/cttsov2/20240915ff0295ed" #' ) -#' LibraryID <- "L2401290" -#' t1 <- Wf_tso_ctdna_tumor_only_v2$new(path = p, LibraryID = LibraryID) +#' prefix <- "L2401290" +#' t1 <- Wf_tso_ctdna_tumor_only_v2$new(path = p, prefix = prefix) #' t1$list_files(max_files = 100) #' t1$list_files_filter_relevant(max_files = 300) #' d <- t1$download_files(max_files = 100, dryrun = F) @@ -20,7 +20,7 @@ #' d_write <- t1$write( #' d_tidy, #' outdir = file.path(p, "dracarys_tidy"), -#' prefix = LibraryID, +#' prefix = prefix, #' format = "tsv" #' ) #' @@ -29,9 +29,9 @@ #' "s3://pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production", #' "analysis/cttsov2/20240915ff0295ed" #' ) -#' LibraryID <- "L2401290" +#' prefix <- "L2401290" #' outdir <- sub("s3:/", "~/s3", p) -#' t2 <- Wf_tso_ctdna_tumor_only_v2$new(path = p, LibraryID = LibraryID) +#' t2 <- Wf_tso_ctdna_tumor_only_v2$new(path = p, prefix = prefix) #' t2$list_files(max_files = 500) #' t2$list_files_filter_relevant(max_files = 500) #' d <- t2$download_files( @@ -43,7 +43,7 @@ #' d_write <- t2$write( #' d_tidy, #' outdir = file.path(outdir, "dracarys_tidy"), -#' prefix = LibraryID, +#' prefix = prefix, #' format = "tsv" #' ) #' } @@ -52,15 +52,15 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class( "Wf_tso_ctdna_tumor_only_v2", inherit = Wf, public = list( - #' @field LibraryID The LibraryID of the tumor sample (needed for path lookup). - LibraryID = NULL, + #' @field prefix The LibraryID prefix of the tumor sample (needed for path lookup). + prefix = NULL, #' @description Create a new Wf_tso_ctdna_tumor_only_v2 object. #' @param path Path to directory with raw workflow results (from S3 or #' local filesystem). - #' @param LibraryID The LibraryID of the sample (needed for path lookup). - initialize = function(path = NULL, LibraryID = NULL) { + #' @param prefix The LibraryID prefix of the tumor sample (needed for path lookup). + initialize = function(path = NULL, prefix = NULL) { wname <- "tso_ctdna_tumor_only_v2" - pref <- LibraryID + pref <- prefix res <- glue("Results/{pref}") li <- "Logs_Intermediates" dc <- glue("{li}/DragenCaller/{pref}") @@ -126,7 +126,7 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class( ) super$initialize(path = path, wname = wname, regexes = regexes) - self$LibraryID <- LibraryID + self$prefix <- prefix }, #' @description Print details about the Workflow. #' @param ... (ignored). @@ -136,7 +136,7 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class( "path", self$path, "wname", self$wname, "filesystem", self$filesystem, - "LibraryID", self$LibraryID + "prefix", self$prefix ) print(res) invisible(self) @@ -146,6 +146,18 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class( read_sar = function(x) { tso_sar_read(x) }, + #' @description Read `TMB_Trace.tsv` file. + #' @param x Path to file. + read_tmbt = function(x) { + dat <- tso_tmbt_read(x) + tibble::tibble(name = "tmbtrace", data = list(dat)) + }, + #' @description Read `CombinedVariantOutput.tsv` file. + #' @param x Path to file. + read_cvo = function(x) { + dat <- tso_combinedvaro_smallv_read(x) + tibble::tibble(name = "combinedvaro", data = list(dat)) + }, #' @description Read `cnv.vcf` file. #' @param x Path to file. read_cnv = function(x) { @@ -187,18 +199,6 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class( dat <- tso_msi_read(x) tibble::tibble(name = "msi", data = list(dat)) }, - #' @description Read `TMB_Trace.tsv` file. - #' @param x Path to file. - read_tmbt = function(x) { - dat <- tso_tmbt_read(x) - tibble::tibble(name = "tmbtrace", data = list(dat)) - }, - #' @description Read `CombinedVariantOutput.tsv` file. - #' @param x Path to file. - read_cvo = function(x) { - dat <- tso_combinedvaro_smallv_read(x) - tibble::tibble(name = "combinedvaro", data = list(dat)) - }, #' @description Read `Fusions.csv` file. #' @param x Path to file. read_fus = function(x) { @@ -207,3 +207,55 @@ Wf_tso_ctdna_tumor_only_v2 <- R6::R6Class( } ) # end public ) + +#' Wf_tso_ctdna_tumor_only_v2 Download Tidy and Write +#' +#' Downloads files from the `tso_ctdna_tumor_only_v2` workflow and writes them in a tidy format. +#' +#' @param path Path to directory with raw workflow results (S3 or +#' local filesystem). +#' @param prefix The LibraryID prefix of the sample (needed for path lookup). +#' @param outdir Path to output directory with raw files. +#' @param outdir_tidy Path to output directory with tidy files. +#' @param format Format of output files. +#' @param max_files Max number of files to list. +#' @param dryrun If TRUE, just list the files that will be downloaded (don't +#' download them). +#' @return Tibble of tidy tibbles. +#' +#' @examples +#' \dontrun{ +#' p <- file.path( +#' "s3://pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production", +#' "analysis/cttsov2/20240915ff0295ed" +#' ) +#' prefix <- "L2401290" +#' outdir <- sub("s3:/", "~/s3", p) +#' d <- dtw_Wf_tso_ctdna_tumor_only_v2( +#' path = p, prefix = prefix, outdir = outdir, +#' format = "tsv", +#' dryrun = F +#' ) +#' } +#' @export +dtw_Wf_tso_ctdna_tumor_only_v2 <- function(path, prefix, outdir, + outdir_tidy = file.path(outdir, "dracarys_tidy"), + format = "rds", + max_files = 1000, + dryrun = FALSE) { + obj <- Wf_tso_ctdna_tumor_only_v2$new(path = path, prefix = prefix) + d_dl <- obj$download_files( + outdir = outdir, max_files = max_files, dryrun = dryrun + ) + if (!dryrun) { + d_tidy <- obj$tidy_files(d_dl) + d_write <- obj$write( + d_tidy, + outdir = outdir_tidy, + prefix = prefix, + format = format + ) + return(d_write) + } + return(d_dl) +} diff --git a/man/Wf_tso_ctdna_tumor_only_v2.Rd b/man/Wf_tso_ctdna_tumor_only_v2.Rd index 10bbf8e..55e998d 100644 --- a/man/Wf_tso_ctdna_tumor_only_v2.Rd +++ b/man/Wf_tso_ctdna_tumor_only_v2.Rd @@ -14,8 +14,8 @@ p <- file.path( "~/s3/pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production", "analysis/cttsov2/20240915ff0295ed" ) -LibraryID <- "L2401290" -t1 <- Wf_tso_ctdna_tumor_only_v2$new(path = p, LibraryID = LibraryID) +prefix <- "L2401290" +t1 <- Wf_tso_ctdna_tumor_only_v2$new(path = p, prefix = prefix) t1$list_files(max_files = 100) t1$list_files_filter_relevant(max_files = 300) d <- t1$download_files(max_files = 100, dryrun = F) @@ -23,7 +23,7 @@ d_tidy <- t1$tidy_files(d) d_write <- t1$write( d_tidy, outdir = file.path(p, "dracarys_tidy"), - prefix = LibraryID, + prefix = prefix, format = "tsv" ) @@ -32,9 +32,9 @@ p <- file.path( "s3://pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production", "analysis/cttsov2/20240915ff0295ed" ) -LibraryID <- "L2401290" +prefix <- "L2401290" outdir <- sub("s3:/", "~/s3", p) -t2 <- Wf_tso_ctdna_tumor_only_v2$new(path = p, LibraryID = LibraryID) +t2 <- Wf_tso_ctdna_tumor_only_v2$new(path = p, prefix = prefix) t2$list_files(max_files = 500) t2$list_files_filter_relevant(max_files = 500) d <- t2$download_files( @@ -46,7 +46,7 @@ d_tidy <- t2$tidy_files(d) d_write <- t2$write( d_tidy, outdir = file.path(outdir, "dracarys_tidy"), - prefix = LibraryID, + prefix = prefix, format = "tsv" ) } @@ -57,7 +57,7 @@ d_write <- t2$write( \section{Public fields}{ \if{html}{\out{
}} \describe{ -\item{\code{LibraryID}}{The LibraryID of the tumor sample (needed for path lookup).} +\item{\code{prefix}}{The LibraryID prefix of the tumor sample (needed for path lookup).} } \if{html}{\out{
}} } @@ -67,13 +67,13 @@ d_write <- t2$write( \item \href{#method-Wf_tso_ctdna_tumor_only_v2-new}{\code{Wf_tso_ctdna_tumor_only_v2$new()}} \item \href{#method-Wf_tso_ctdna_tumor_only_v2-print}{\code{Wf_tso_ctdna_tumor_only_v2$print()}} \item \href{#method-Wf_tso_ctdna_tumor_only_v2-read_sar}{\code{Wf_tso_ctdna_tumor_only_v2$read_sar()}} +\item \href{#method-Wf_tso_ctdna_tumor_only_v2-read_tmbt}{\code{Wf_tso_ctdna_tumor_only_v2$read_tmbt()}} +\item \href{#method-Wf_tso_ctdna_tumor_only_v2-read_cvo}{\code{Wf_tso_ctdna_tumor_only_v2$read_cvo()}} \item \href{#method-Wf_tso_ctdna_tumor_only_v2-read_cnv}{\code{Wf_tso_ctdna_tumor_only_v2$read_cnv()}} \item \href{#method-Wf_tso_ctdna_tumor_only_v2-read_cvgrepe}{\code{Wf_tso_ctdna_tumor_only_v2$read_cvgrepe()}} \item \href{#method-Wf_tso_ctdna_tumor_only_v2-read_cvgrepg}{\code{Wf_tso_ctdna_tumor_only_v2$read_cvgrepg()}} \item \href{#method-Wf_tso_ctdna_tumor_only_v2-read_hardfilt}{\code{Wf_tso_ctdna_tumor_only_v2$read_hardfilt()}} \item \href{#method-Wf_tso_ctdna_tumor_only_v2-read_msi}{\code{Wf_tso_ctdna_tumor_only_v2$read_msi()}} -\item \href{#method-Wf_tso_ctdna_tumor_only_v2-read_tmbt}{\code{Wf_tso_ctdna_tumor_only_v2$read_tmbt()}} -\item \href{#method-Wf_tso_ctdna_tumor_only_v2-read_cvo}{\code{Wf_tso_ctdna_tumor_only_v2$read_cvo()}} \item \href{#method-Wf_tso_ctdna_tumor_only_v2-read_fus}{\code{Wf_tso_ctdna_tumor_only_v2$read_fus()}} \item \href{#method-Wf_tso_ctdna_tumor_only_v2-clone}{\code{Wf_tso_ctdna_tumor_only_v2$clone()}} } @@ -95,7 +95,7 @@ d_write <- t2$write( \subsection{Method \code{new()}}{ Create a new Wf_tso_ctdna_tumor_only_v2 object. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only_v2$new(path = NULL, LibraryID = NULL)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only_v2$new(path = NULL, prefix = NULL)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -104,7 +104,7 @@ Create a new Wf_tso_ctdna_tumor_only_v2 object. \item{\code{path}}{Path to directory with raw workflow results (from S3 or local filesystem).} -\item{\code{LibraryID}}{The LibraryID of the sample (needed for path lookup).} +\item{\code{prefix}}{The LibraryID prefix of the tumor sample (needed for path lookup).} } \if{html}{\out{}} } @@ -135,6 +135,40 @@ Read \code{SampleAnalysisResults.json.gz} file. \if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only_v2$read_sar(x)}\if{html}{\out{
}} } +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only_v2-read_tmbt}{}}} +\subsection{Method \code{read_tmbt()}}{ +Read \code{TMB_Trace.tsv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only_v2$read_tmbt(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{Path to file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only_v2-read_cvo}{}}} +\subsection{Method \code{read_cvo()}}{ +Read \code{CombinedVariantOutput.tsv} file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only_v2$read_cvo(x)}\if{html}{\out{
}} +} + \subsection{Arguments}{ \if{html}{\out{
}} \describe{ @@ -220,40 +254,6 @@ Read \code{microsat_output.json} file. \if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only_v2$read_msi(x)}\if{html}{\out{
}} } -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{x}}{Path to file.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only_v2-read_tmbt}{}}} -\subsection{Method \code{read_tmbt()}}{ -Read \code{TMB_Trace.tsv} file. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only_v2$read_tmbt(x)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{x}}{Path to file.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Wf_tso_ctdna_tumor_only_v2-read_cvo}{}}} -\subsection{Method \code{read_cvo()}}{ -Read \code{CombinedVariantOutput.tsv} file. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Wf_tso_ctdna_tumor_only_v2$read_cvo(x)}\if{html}{\out{
}} -} - \subsection{Arguments}{ \if{html}{\out{
}} \describe{ diff --git a/man/dtw_Wf_tso_ctdna_tumor_only_v2.Rd b/man/dtw_Wf_tso_ctdna_tumor_only_v2.Rd new file mode 100644 index 0000000..2a8ebf7 --- /dev/null +++ b/man/dtw_Wf_tso_ctdna_tumor_only_v2.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tsov2.R +\name{dtw_Wf_tso_ctdna_tumor_only_v2} +\alias{dtw_Wf_tso_ctdna_tumor_only_v2} +\title{Wf_tso_ctdna_tumor_only_v2 Download Tidy and Write} +\usage{ +dtw_Wf_tso_ctdna_tumor_only_v2( + path, + prefix, + outdir, + outdir_tidy = file.path(outdir, "dracarys_tidy"), + format = "rds", + max_files = 1000, + dryrun = FALSE +) +} +\arguments{ +\item{path}{Path to directory with raw workflow results (S3 or +local filesystem).} + +\item{prefix}{The LibraryID prefix of the sample (needed for path lookup).} + +\item{outdir}{Path to output directory with raw files.} + +\item{outdir_tidy}{Path to output directory with tidy files.} + +\item{format}{Format of output files.} + +\item{max_files}{Max number of files to list.} + +\item{dryrun}{If TRUE, just list the files that will be downloaded (don't +download them).} +} +\value{ +Tibble of tidy tibbles. +} +\description{ +Downloads files from the \code{tso_ctdna_tumor_only_v2} workflow and writes them in a tidy format. +} +\examples{ +\dontrun{ +p <- file.path( + "s3://pipeline-prod-cache-503977275616-ap-southeast-2/byob-icav2/production", + "analysis/cttsov2/20240915ff0295ed" +) +prefix <- "L2401290" +outdir <- sub("s3:/", "~/s3", p) +d <- dtw_Wf_tso_ctdna_tumor_only_v2( + path = p, prefix = prefix, outdir = outdir, + format = "tsv", + dryrun = F +) +} +}