Skip to content

Commit

Permalink
Merge pull request #152 from umccr/dev0312
Browse files Browse the repository at this point in the history
Initial ICAv1 cleanup
  • Loading branch information
pdiakumis authored Dec 6, 2024
2 parents 3416104 + fa3c2aa commit 209b20c
Show file tree
Hide file tree
Showing 43 changed files with 77 additions and 1,407 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
^setup\.cfg$
^setup\.py$
^vignettes$
inst/reports/wgts-qc/nogit
inst/rmd/umccr_portal/html
inst/rmd/umccr_workflows/alignment_qc/nogit
inst/rmd/umccr_workflows/bcl_convert/html
Expand Down
2 changes: 0 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ Imports:
ggrepel,
glue,
here,
httr,
jose,
jsonlite,
knitr,
lubridate,
Expand Down
9 changes: 0 additions & 9 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ export(bcftools_parse_vcf)
export(bcftools_parse_vcf_regions)
export(date_log)
export(dr_func_eval)
export(dr_gds_download)
export(dr_output_format_valid)
export(dr_s3_download)
export(dragen_cnv_metrics_read)
Expand All @@ -35,15 +34,7 @@ export(dtw_Wf_tso_ctdna_tumor_only)
export(dtw_Wf_tso_ctdna_tumor_only_v2)
export(empty_tbl)
export(file_regex_getter)
export(gds_file_download_api)
export(gds_file_download_cli)
export(gds_file_presignedurl)
export(gds_files_list_fastq)
export(gds_list_files_dir)
export(gds_list_files_filter_relevant)
export(gds_volumes_list)
export(grep_file)
export(ica_token_validate)
export(local_list_files_dir)
export(local_list_files_filter_relevant)
export(match_regex)
Expand Down
64 changes: 11 additions & 53 deletions R/Wf.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#'
#' A workflow has:
#'
#' - a directory path with all the raw output files (either on GDS, S3 or
#' - a directory path with all the raw output files (either on S3 or
#' local filesystem)
#' - a subset of files that are of interest for ingestion
#' - tibble with full path and basename columns
Expand Down Expand Up @@ -33,20 +33,6 @@
#' um1$list_files(max_files = 10)
#' um1$list_files_filter_relevant(max_files = 10)
#'
#' #---- GDS ----#
#' p1_gds <- "gds://production/analysis_data"
#' p <- file.path(p1_gds, "SBJ03043/umccrise/20240830ec648f40/L2300064__L2300063")
#' outdir <- file.path(sub("gds:/", "~/icav1/g", p))
#' token <- Sys.getenv("ICA_ACCESS_TOKEN")
#' um2 <- Wf$new(path = p, wname = "umccrise", regexes = regexes)
#' um2$list_files(max_files = 10)
#' um2$list_files_filter_relevant(ica_token = token, max_files = 500)
#' d <- um2$download_files(
#' outdir = outdir, ica_token = token,
#' max_files = 1000, dryrun = T
#' )
#' d_tidy <- um2$tidy_files(d)
#'
#' #---- S3 ----#
#' p1_s3 <- "s3://org.umccr.data.oncoanalyser/analysis_data/SBJ05570/sash/202408275fce06c3"
#' p2_s3 <- "L2401304_L2401303/SBJ05570_MDX240299/cancer_report/cancer_report_tables"
Expand Down Expand Up @@ -110,7 +96,6 @@ Wf <- R6::R6Class(
private$.path <- path
private$.wname <- wname
private$.filesystem <- dplyr::case_when(
grepl("^gds://", path) ~ "gds",
grepl("^s3://", path) ~ "s3",
.default = "local"
)
Expand All @@ -135,16 +120,9 @@ Wf <- R6::R6Class(
},
#' @description List all files under given path.
#' @param path Path with raw results.
#' @param max_files Max number of files to list (for gds/s3 only).
#' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
#' @param ... Passed on to `gds_list_files_dir` function.
list_files = function(path = private$.path, max_files = 1000,
ica_token = Sys.getenv("ICA_ACCESS_TOKEN"), ...) {
if (private$.filesystem == "gds") {
d <- gds_list_files_dir(
gdsdir = path, token = ica_token, page_size = max_files, ...
)
} else if (private$.filesystem == "s3") {
#' @param max_files Max number of files to list.
list_files = function(path = private$.path, max_files = 1000) {
if (private$.filesystem == "s3") {
d <- s3_list_files_dir(s3dir = path, max_objects = max_files)
} else {
d <- local_list_files_dir(localdir = path, max_files = max_files)
Expand All @@ -153,19 +131,12 @@ Wf <- R6::R6Class(
},
#' @description List dracarys files under given path
#' @param path Path with raw results.
#' @param max_files Max number of files to list (for gds/s3 only).
#' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
#' @param ... Passed on to the `gds_list_files_filter_relevant` or
#' the `s3_list_files_filter_relevant` function.
list_files_filter_relevant = function(path = private$.path, max_files = 1000,
ica_token = Sys.getenv("ICA_ACCESS_TOKEN"), ...) {
#' @param max_files Max number of files to list.
#' @param ... Passed on to `s3_list_files_filter_relevant`.
list_files_filter_relevant = function(path = private$.path, max_files = 1000, ...) {
regexes <- private$.regexes
assertthat::assert_that(!is.null(regexes))
if (private$.filesystem == "gds") {
d <- gds_list_files_filter_relevant(
gdsdir = path, regexes = regexes, token = ica_token, page_size = max_files, ...
)
} else if (private$.filesystem == "s3") {
if (private$.filesystem == "s3") {
d <- s3_list_files_filter_relevant(
s3dir = path, regexes = regexes, max_objects = max_files, ...
)
Expand All @@ -185,29 +156,16 @@ Wf <- R6::R6Class(
data = list(tibble::tibble(input_path = x))
)
},
#' @description Download files from GDS/S3 to local filesystem.
#' @description Download files from S3 to local filesystem.
#' @param path Path with raw results.
#' @param outdir Path to output directory.
#' @param ica_token ICA access token (def: $ICA_ACCESS_TOKEN env var).
#' @param max_files Max number of files to list.
#' @param dryrun If TRUE, just list the files that will be downloaded (don't
#' download them).
#' @param recursive Should files be returned recursively _in and under_ the specified
#' GDS directory, or _only directly in_ the specified GDS directory (def: TRUE via ICA API).
download_files = function(path = private$.path, outdir, ica_token = Sys.getenv("ICA_ACCESS_TOKEN"),
max_files = 1000, dryrun = FALSE, recursive = NULL) {
download_files = function(path = private$.path, outdir, max_files = 1000, dryrun = FALSE) {
regexes <- private$.regexes
assertthat::assert_that(!is.null(regexes))
if (private$.filesystem == "gds") {
d <- dr_gds_download(
gdsdir = path, outdir = outdir, regexes = regexes, token = ica_token,
page_size = max_files, dryrun = dryrun, recursive = recursive
)
if (!dryrun) {
private$.filesystem <- "local"
private$.path <- outdir
}
} else if (private$.filesystem == "s3") {
if (private$.filesystem == "s3") {
d <- dr_s3_download(
s3dir = path, outdir = outdir, regexes = regexes,
max_objects = max_files, dryrun = dryrun
Expand Down
18 changes: 0 additions & 18 deletions R/dragen.R
Original file line number Diff line number Diff line change
Expand Up @@ -902,24 +902,6 @@ dtw_Wf_dragen <- function(path, prefix, outdir,
#' prefix = prefix,
#' format = "tsv"
#' )
#' #---- GDS ----#
#' prefix <- "PRJ222358"
#' p <- file.path(
#' "gds://production/analysis_data/SBJ03001/wgs_tumor_normal",
#' "20241108fc293a38/L2201805_L2201797_dragen_somatic"
#' )
#' outdir <- file.path(sub("gds:/", normalizePath("~/icav1/g"), p)) # for GDS case
#' d1 <- Wf_dragen$new(path = p, prefix = prefix)
#' d1$list_files(max_files = 100)
#' d1$list_files_filter_relevant(max_files = 300)
#' d <- d1$download_files(max_files = 100, outdir = outdir, dryrun = F)
#' d_tidy <- d1$tidy_files(d)
#' d_write <- d1$write(
#' d_tidy,
#' outdir = file.path(p, "dracarys_tidy"),
#' prefix = prefix,
#' format = "tsv"
#' )
#' }
#' @export
Wf_dragen <- R6::R6Class(
Expand Down
Loading

0 comments on commit 209b20c

Please sign in to comment.