Skip to content

Commit

Permalink
Merge pull request #162 from umccr/icav1_download_and_run
Browse files Browse the repository at this point in the history
update inst/scripts/icav1_download_and_run.R
  • Loading branch information
pdiakumis authored Aug 21, 2024
2 parents 08625ad + 75b35d0 commit d0f5937
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 40 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# R specific hooks: https://github.com/lorenzwalthert/precommit
repos:
- repo: https://github.com/lorenzwalthert/precommit
rev: v0.3.2
rev: v0.4.3
hooks:
- id: style-files
args: [--style_pkg=styler, --style_fun=tidyverse_style]
Expand All @@ -14,7 +14,7 @@ repos:
- id: use-tidy-description
- id: readme-rmd-rendered
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
rev: v4.6.0
hooks:
- id: check-added-large-files
args: ['--maxkb=200']
Expand Down
59 changes: 21 additions & 38 deletions inst/scripts/icav1_download_and_run.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,20 @@
require(dracarys)
require(dplyr)
require(readr)
require(rportal, include.only = "portaldb_query_workflow")
require(glue, include.only = "glue")
require(here, include.only = "here")

# grab rnasum workflow metadata from Athena
athena_rnasum <- function(sbj) {
RAthena::RAthena_options(clear_s3_resource = FALSE)
con <- DBI::dbConnect(
RAthena::athena(),
work_group = "data_portal",
rstudio_conn_tab = FALSE
)
q_quote <- shQuote(paste(glue("rnasum__{sbj}"), collapse = "|"))
q1 <- glue(
'SELECT * FROM "data_portal"."data_portal"."data_portal_workflow" where REGEXP_LIKE("wfr_name", {q_quote});'
)
d <- RAthena::dbGetQuery(con, q1) |>
tibble::as_tibble()
d |>
dracarys::meta_rnasum()
query1 <- glue('WHERE REGEXP_LIKE("wfr_name", {q_quote});')
rportal::portaldb_query_workflow(query1)
}

athena_lims <- function(libid) {
query1 <- glue("WHERE REGEXP_LIKE(\"library_id\", '{libid}');")
rportal::portaldb_query_limsrow(query1)
}

# download gds files to a local structure reflecting the gds path starting from
Expand All @@ -42,39 +37,28 @@ rnasum_download <- function(gdsdir, outdir, token, page_size = 200, regexes) {
}

# SBJ IDs of interest
sbj1 <- c("SBJ04215", "SBJ04371", "SBJ04378", "SBJ04379")
sbj2 <- c("SBJ04388", "SBJ04391", "SBJ04387", "SBJ03190")
date1 <- "2023-11-09"
# grab glims
lims_rds <- here::here(glue("nogit/data_portal/lims/{date1}.rds"))
# lims_raw <- dracarys::glims_read()
# saveRDS(lims_raw, file = lims_rds)
lims_raw <- readr::read_rds(lims_rds)

pmeta_rds <- here::here(glue("nogit/data_portal/workflows/{date1}.rds"))
# pmeta_raw <- athena_rnasum(c(sbj1, sbj2))
# saveRDS(pmeta_raw, file = pmeta_rds)
pmeta_raw <- readr::read_rds(pmeta_rds)

sbj <- "SBJ04426"
lib <- "L2301428"
date1 <- "2024-08-20"
lims_raw <- athena_lims(lib)
pmeta_raw <- athena_rnasum(sbj) |>
rportal::meta_rnasum()
lims <- lims_raw |>
dplyr::select(
Timestamp, SubjectID, SampleID, SampleName, LibraryID, ExternalSubjectID, ExternalSampleID,
ProjectOwner, ProjectName, Type, Assay, Phenotype, Source, Quality, Topup, Workflow
)
dplyr::select(library_id, sample_id, subject_id)

# generate tidy rnasum metadata from portal workflows table, and join against glims
pmeta <- pmeta_raw |>
dplyr::left_join(lims, by = c("LibraryID", "SampleID", "SubjectID")) |>
dplyr::left_join(lims, by = c("LibraryID" = "library_id", "SampleID" = "sample_id", "SubjectID" = "subject_id")) |>
dplyr::select(
gds_indir_dragen, gds_indir_umccrise, gds_indir_arriba,
SubjectID, LibraryID, SampleID, Phenotype, rnasum_dataset,
SubjectID, LibraryID, SampleID,
rnasum_dataset,
end_status,
# ExternalSubjectID, ProjectOwner, ProjectName, Type, Assay, Source, Quality, Workflow,
wfr_id, start, end, gds_outfile_rnasum_html,
) |>
dplyr::arrange(desc(SubjectID), start) |>
# just keep PANCAN to get rid of dups
dplyr::filter(rnasum_dataset == "PANCAN")
dplyr::filter(rnasum_dataset == "BRCA") |>
dplyr::slice_head(n = 1)

# patterns of files to fish out
rnasum_file_regex <- tibble::tribble(
Expand Down Expand Up @@ -147,12 +131,11 @@ rnasum_params_set <- function(arriba_pdf, arriba_tsv, dataset, dragen_fusions, d
d_runs <- meta_rnasum |>
tidyr::unnest(down) |>
dplyr::select(SubjectID, LibraryID, rnasum_dataset, type, outfile) |>
dplyr::filter(SubjectID != "SBJ03190") |>
tidyr::pivot_wider(names_from = type, values_from = outfile)

# slice to whichever run you want from d
d_runs |>
dplyr::slice(2) |>
dplyr::slice(1) |>
dplyr::rowwise() |>
dplyr::mutate(
params = list(
Expand Down

0 comments on commit d0f5937

Please sign in to comment.