Skip to content

Commit

Permalink
Merge pull request #94 from umccr/um_hrd_sig
Browse files Browse the repository at this point in the history
umccrise: support presigned URL parsing
  • Loading branch information
pdiakumis authored Sep 5, 2023
2 parents 94c1aa1 + 65ee029 commit 4436c11
Show file tree
Hide file tree
Showing 15 changed files with 399 additions and 176 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ Imports:
ggplot2,
ggrepel,
glue,
googledrive,
googlesheets4,
here,
httr,
jose,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ export(gds_file_presignedurl)
export(gds_files_list)
export(gds_files_list_filter_relevant)
export(gds_volumes_list)
export(glims_read)
export(ica_token_validate)
export(match_regex)
export(meta_bcl_convert)
Expand Down
4 changes: 2 additions & 2 deletions R/regex.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ DR_FILE_REGEX <- tibble::tribble(
"somatic\\.pcgr\\.snvs_indels\\.tiers\\.tsv$", "PcgrTiersFile",
"chord\\.tsv\\.gz$", "UmChordTsvFile",
"hrdetect\\.tsv\\.gz$", "UmHrdetectTsvFile",
"snv_2015\\.tsv\\.gz$", "UmSigsSnv2015File",
"snv_2020\\.tsv\\.gz$", "UmSigsSnv2020File",
"snv_2015\\.tsv\\.gz$", "UmSigsSnvFile",
"snv_2020\\.tsv\\.gz$", "UmSigsSnvFile",
"-qc_summary\\.tsv\\.gz$", "UmQcSumFile"
)

Expand Down
81 changes: 44 additions & 37 deletions R/umccrise.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' x <- "/path/to/chord.tsv.gz"
#' d <- UmChordTsvFile$new(x)
#' d_parsed <- d$read() # or read(d)
#' d$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = "both")
#' d$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = "tsv")
#' }
#' @export
UmChordTsvFile <- R6::R6Class(
Expand All @@ -29,7 +29,7 @@ UmChordTsvFile <- R6::R6Class(
p_BRCA1 = "d",
p_BRCA2 = "d"
)
readr::read_tsv(x, col_types = ct)
read_tsvgz(x, col_types = ct)
},

#' @description
Expand All @@ -38,12 +38,14 @@ UmChordTsvFile <- R6::R6Class(
#' @param d Parsed object from `self$read()`.
#' @param prefix Prefix of output file(s).
#' @param out_dir Output directory.
#' @param out_format Format of output file(s) (one of 'tsv' (def.),
#' 'parquet', 'both').
write = function(d, out_dir, prefix, out_format = "tsv") {
prefix <- file.path(out_dir, prefix)
prefix2 <- glue("{prefix}_chord")
write_dracarys(obj = d, prefix = prefix2, out_format = out_format)
#' @param out_format Format of output file(s).
#' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) {
if (!is.null(out_dir)) {
prefix <- file.path(out_dir, prefix)
}
# prefix2 <- glue("{prefix}_chord")
write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
}
)
)
Expand All @@ -59,7 +61,7 @@ UmChordTsvFile <- R6::R6Class(
#' x <- "/path/to/hrdetect.tsv.gz"
#' d <- UmHrdetectTsvFile$new(x)
#' d_parsed <- d$read() # or read(d)
#' d$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = "both")
#' d$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = "tsv")
#' }
#' @export
UmHrdetectTsvFile <- R6::R6Class(
Expand All @@ -76,7 +78,7 @@ UmHrdetectTsvFile <- R6::R6Class(
.default = "d",
sample = "c"
)
readr::read_tsv(x, col_types = ct) |>
read_tsvgz(x, col_types = ct) |>
dplyr::select(-c("sample"))
},

Expand All @@ -86,12 +88,14 @@ UmHrdetectTsvFile <- R6::R6Class(
#' @param d Parsed object from `self$read()`.
#' @param prefix Prefix of output file(s).
#' @param out_dir Output directory.
#' @param out_format Format of output file(s) (one of 'tsv' (def.),
#' 'parquet', 'both').
write = function(d, out_dir, prefix, out_format = "tsv") {
prefix <- file.path(out_dir, prefix)
prefix2 <- glue("{prefix}_hrdetect")
write_dracarys(obj = d, prefix = prefix2, out_format = out_format)
#' @param out_format Format of output file(s).
#' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
write = function(d, out_dir, prefix, out_format = "tsv", drid = NULL) {
if (!is.null(out_dir)) {
prefix <- file.path(out_dir, prefix)
}
# prefix2 <- glue("{prefix}_hrdetect")
write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
}
)
)
Expand All @@ -107,7 +111,7 @@ UmHrdetectTsvFile <- R6::R6Class(
#' x <- "/path/to/snv_2015.tsv.gz"
#' d <- UmSigsSnvFile$new(x)
#' d_parsed <- d$read() # or read(d)
#' d$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = "both")
#' d$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = "tsv")
#' }
#' @export
UmSigsSnvFile <- R6::R6Class(
Expand All @@ -125,10 +129,7 @@ UmSigsSnvFile <- R6::R6Class(
.default = "d",
Signature = "c"
)
list(
data = readr::read_tsv(x, col_types = ct),
version = version
)
read_tsvgz(x, col_types = ct)
},

#' @description
Expand All @@ -137,13 +138,14 @@ UmSigsSnvFile <- R6::R6Class(
#' @param d Parsed object from `self$read()`.
#' @param prefix Prefix of output file(s).
#' @param out_dir Output directory.
#' @param out_format Format of output file(s) (one of 'tsv' (def.),
#' 'parquet', 'both').
#' @param out_format Format of output file(s).
#' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
write = function(d, out_dir, prefix, out_format = "tsv") {
prefix <- file.path(out_dir, prefix)
version <- d[["version"]]
prefix2 <- glue("{prefix}_sigs_snv{version}")
write_dracarys(obj = d[["data"]], prefix = prefix2, out_format = out_format)
if (!is.null(out_dir)) {
prefix <- file.path(out_dir, prefix)
}
# prefix2 <- glue("{prefix}_sigs_snv")
write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
}
)
)
Expand All @@ -159,7 +161,7 @@ UmSigsSnvFile <- R6::R6Class(
#' x <- "/path/to/snv_2015.tsv.gz"
#' d <- UmQcSumFile$new(x)
#' d_parsed <- d$read() # or read(d)
#' d$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = "both")
#' d$write(d_parsed, out_dir = tempdir(), prefix = "sample705", out_format = "tsv")
#' }
#' @export
UmQcSumFile <- R6::R6Class(
Expand All @@ -172,8 +174,8 @@ UmQcSumFile <- R6::R6Class(
#' @return A tibble.
read = function() {
x <- self$path
d <- readr::read_tsv(x, col_types = readr::cols(.default = "c"))
d <- d |>
d <- read_tsvgz(x, col_types = readr::cols(.default = "c"))
d |>
dplyr::select("variable", "value") |>
tidyr::pivot_wider(names_from = "variable", values_from = "value") |>
dplyr::rename(MSI_mb_tmp = "MSI (indels/Mb)") |>
Expand All @@ -189,7 +191,9 @@ UmQcSumFile <- R6::R6Class(
deleted_genes_hmf = as.numeric(.data$DeletedGenes),
msi_hmf = sub("(.*) \\(.*\\)", "\\1", .data$MSI_mb_tmp),
tmb_hmf = sub("(.*) \\(.*\\)", "\\1", .data$TMB) |> as.numeric(),
tml_hmf = sub("(.*) \\(.*\\)", "\\1", .data$TML) |> as.numeric()
tml_hmf = sub("(.*) \\(.*\\)", "\\1", .data$TML) |> as.numeric(),
hypermutated = ifelse("Hypermutated" %in% d$variable, .data[["Hypermutated"]], NA) |> as.character(),
bpi_enabled = ifelse("BPI Enabled" %in% d$variable, .data[["BPI Enabled"]], NA) |> as.character(),
) |>
dplyr::select(
qc_status_hmf = "QC_Status",
Expand All @@ -198,6 +202,7 @@ UmQcSumFile <- R6::R6Class(
"hrd_chord", "hrd_hrdetect", "contamination_hmf",
"deleted_genes_hmf", "tmb_hmf", "tml_hmf",
wgd_hmf = "WGD",
hypermutated, bpi_enabled
)
},

Expand All @@ -208,12 +213,14 @@ UmQcSumFile <- R6::R6Class(
#' @param d Parsed object from `self$read()`.
#' @param prefix Prefix of output file(s).
#' @param out_dir Output directory.
#' @param out_format Format of output file(s) (one of 'tsv' (def.),
#' 'parquet', 'both').
write = function(d, out_dir, prefix, out_format = "tsv") {
prefix <- file.path(out_dir, prefix)
prefix2 <- glue("{prefix}_qc_summary")
write_dracarys(obj = d, prefix = prefix2, out_format = out_format)
#' @param out_format Format of output file(s).
#' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`).
write = function(d, out_dir, prefix, out_format = "tsv", drid = NULL) {
if (!is.null(out_dir)) {
prefix <- file.path(out_dir, prefix)
}
# prefix2 <- glue("{prefix}_qc_summary")
write_dracarys(obj = d, prefix = prefix, out_format = out_format, drid = drid)
}
)
)
32 changes: 32 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,16 @@ empty_tbl <- function(cnames, ctypes = readr::cols(.default = "c")) {
readr::read_csv("\n", col_names = cnames, col_types = ctypes)
}

read_tsvgz <- function(x, ...) {
if (is_url(x)) {
res <- base::url(x) |>
base::gzcon() |>
readr::read_tsv(...)
return(res)
}
readr::read_tsv(x, ...)
}

read_jsongz_jsonlite <- function(x, ...) {
if (is_url(x)) {
# https://github.com/jeroen/jsonlite/issues/414
Expand All @@ -149,3 +159,25 @@ read_jsongz_rjsonio <- function(x, ...) {
}
RJSONIO::fromJSON(x, ...)
}

#' Read Google LIMS
#'
#' Reads UMCCR's Google LIMS spreadsheet.
#'
#' @return Tibble with all columns and rows from the Google LIMS spreadsheet.
#' @export
glims_read <- function() {
lims_key <- googledrive::drive_find("^Google LIMS$", shared_drive = "LIMS")$id
lims <- lims_key |>
googlesheets4::read_sheet("Sheet1", na = c(".", "", "-"), col_types = "c")
lims |> readr::type_convert(col_types = readr::cols(.default = "c", Timestamp = "T"))
}


#' @noRd
dummy1 <- function() {
# Solves R CMD check: Namespaces in Imports field not imported from
scales::pretty_breaks
argparse::ArgumentParser
here::here
}
4 changes: 4 additions & 0 deletions conda/recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ requirements:
- r-ggplot2
- r-ggrepel
- r-glue
- r-googledrive
- r-googlesheets4
- r-here
- r-httr
- r-jose
Expand Down Expand Up @@ -59,6 +61,8 @@ requirements:
- r-ggplot2
- r-ggrepel
- r-glue
- r-googledrive
- r-googlesheets4
- r-here
- r-httr
- r-jose
Expand Down
9 changes: 1 addition & 8 deletions inst/rmd/umccr_portal/portal_summary.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,6 @@ kable_empty_wf <- function(wf) {
kableExtra::kable_minimal(full_width = TRUE, position = "left")
}
glims_read <- function() {
lims_key <- googledrive::drive_find("^Google LIMS$", shared_drive = "LIMS")$id
lims <- lims_key |>
googlesheets4::read_sheet("Sheet1", na = c(".", "", "-"), col_types = "c")
lims |> readr::type_convert(col_types = readr::cols(.default = "c", Timestamp = "T"))
}
dt_view <- function(x, ...) {
x |>
dplyr::mutate(across(where(is.character), as.factor)) |>
Expand Down Expand Up @@ -133,7 +126,7 @@ wf_order <- c(
)
lims_rds <- here(glue("nogit/data_portal/lims/{as.Date(date_end)}.rds"))
# lims_raw <- glims_read()
# lims_raw <- dracarys::glims_read()
# saveRDS(lims_raw, file = lims_rds)
lims_raw <- readr::read_rds(lims_rds)
pmeta_rds <- here(glue("nogit/data_portal/workflows/{as.Date(date_end)}.rds"))
Expand Down
4 changes: 2 additions & 2 deletions inst/rmd/umccr_workflows/bcl_convert/single.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ output:
rmdformats::material:
highlight: kate
params:
title: "UMCCR BCL Convert Report"
title: "UMCCR bcl_convert Report"
gds_outdir: "X"
description: "UMCCR BCL Convert Report"
description: "UMCCR bcl_convert Report"
title: "`r params$title`"
---

Expand Down
Loading

0 comments on commit 4436c11

Please sign in to comment.