diff --git a/inst/rmd/rnasum.Rmd b/inst/rmd/rnasum.Rmd index 87a0c9b3..89ec8642 100755 --- a/inst/rmd/rnasum.Rmd +++ b/inst/rmd/rnasum.Rmd @@ -3198,24 +3198,28 @@ Table summarising the **mRNA expression** values in cancer and patient samples f ##### Percentiles -```{r cn_expr_data_table_gains_perc, comment = NA, message=FALSE, warning=FALSE, eval = runPurpleChunk} +```{r cn_expr_data_table_gains_check, eval = runPurpleChunk} ##### Generate expression summary table for per-gene expression values CN values and mutation status info (colours) ##### Keep only genes within CN gains cn_data <- ref_dataset.list[[dataset]][["expr_mut_cn_data"]] |> dplyr::filter(.data$CN >= cn_top) |> dplyr::select("Gene", "CN") genes_gains <- as.character(cn_genes[cn_genes %in% cn_data[["Gene"]]]) - +runPurpleGainsChunk <- TRUE ##### Deal with no genes if (length(genes_gains) == 0) { genes_gains <- NULL genes_gains_no <- 0 + runPurpleGainsChunk <- FALSE } else if (length(genes_gains) > params$top_genes) { genes_gains_no <- params$top_genes } else { genes_gains_no <- length(genes_gains) } +``` + +```{r cn_expr_data_table_gains_perc, comment = NA, message=FALSE, warning=FALSE, eval = runPurpleChunk & runPurpleGainsChunk} ##### Get expression data dat1 <- ref_dataset.list[[dataset]][["data_to_report"]] @@ -3292,7 +3296,7 @@ The [RED]{style="color:#ff0000"} colour range indicate relatively **high express ##### Z-scores -```{r cn_expr_data_table_gains, comment = NA, message=FALSE, warning=FALSE, fig.width = 8, fig.height = 3, eval = runPurpleChunk} +```{r cn_expr_data_table_gains, comment = NA, message=FALSE, warning=FALSE, fig.width = 8, fig.height = 3, eval = runPurpleChunk & runPurpleGainsChunk} ##### Generate expression summary table for per-gene expression values CN values and mutation status info (colours) if (runPcgrChunk && runPurpleChunk) { cn_expr_genes.expr.gains.z <- RNAsum::exprTable( @@ -3373,24 +3377,28 @@ Table summarising the **mRNA expression** values in cancer and patient samples f ##### Percentiles -```{r cn_expr_data_table_losses_perc, comment = NA, message=FALSE, warning=FALSE, eval = runPurpleChunk} +```{r cn_expr_data_table_losses_check, eval = runPurpleChunk} ##### Generate expression summary table for per-gene expression values CN values and mutation status info (colours) ##### Keep only genes within CN losses cn_data <- ref_dataset.list[[dataset]][["expr_mut_cn_data"]] |> dplyr::filter(.data$CN <= cn_bottom) |> dplyr::select("Gene", "CN") genes_losses <- as.character(cn_genes[cn_genes %in% cn_data[["Gene"]]]) - +runPurpleLossesChunk <- TRUE ##### Deal with no genes or when more than 10 genes are of interest if (length(genes_losses) == 0) { genes_losses <- NULL genes_losses_no <- 0 + runPurpleLossesChunk <- FALSE } else if (length(genes_losses) > params$top_genes) { genes_losses_no <- params$top_genes } else { genes_losses_no <- length(genes_losses) } +``` +```{r cn_expr_data_table_losses_perc, comment = NA, message=FALSE, warning=FALSE, eval = runPurpleChunk & runPurpleLossesChunk} +##### Deal with no genes or when more than 10 genes are of interest if (genes_gains_no + genes_losses_no > params$top_genes) { limit_genes <- TRUE } else { @@ -3473,7 +3481,7 @@ The [RED]{style="color:#ff0000"} colour range indicate relatively **high express ##### Z-scores -```{r cn_expr_data_table_losses, comment = NA, message=FALSE, warning=FALSE, eval = runPurpleChunk} +```{r cn_expr_data_table_losses, comment = NA, message=FALSE, warning=FALSE, eval = runPurpleChunk & runPurpleLossesChunk} ##### Generate expression summary table for per-gene expression values CN values and mutation status info (colours) if (runPcgrChunk && runPurpleChunk) { cn_expr_genes.expr.losses.z <- RNAsum::exprTable( @@ -3555,7 +3563,7 @@ The [RED]{style="color:#ff0000"} colour range indicate relatively **high express #### Gains {.tabset} -```{r cdf_plot_cn_expr_gains, echo=FALSE, comment = NA, message=FALSE, warning=FALSE, fig.width = 8, fig.height = 3, eval = runPurpleChunk, results="asis"} +```{r cdf_plot_cn_expr_gains, echo=FALSE, comment = NA, message=FALSE, warning=FALSE, fig.width = 8, fig.height = 3, eval = runPurpleChunk & runPurpleGainsChunk, results="asis"} ##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level for the genes of interest in the context of the overall mRNA expression distribution output_cdf <- list() output_counts <- list() @@ -3642,7 +3650,7 @@ rm(list = ls(pattern = "^output*")) #### Losses {.tabset} -```{r cdf_plot_cn_expr_losses, echo=FALSE, comment = NA, message=FALSE, warning=FALSE, fig.width = 8, fig.height = 3, eval = runPurpleChunk, results="asis"} +```{r cdf_plot_cn_expr_losses, echo=FALSE, comment = NA, message=FALSE, warning=FALSE, fig.width = 8, fig.height = 3, eval = runPurpleChunk & runPurpleLossesChunk, results="asis"} ##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level for the genes of interest in the context of the overall mRNA expression distribution output_cdf <- list() output_counts <- list() @@ -4607,7 +4615,7 @@ cat(drugsTable_legend) `r if ( params$drugs ) { c("#### Gains") }` -```{r drugs_predictive_cn_altered_genes_gains, comment = NA, message=FALSE, warning=FALSE, eval = runPurpleChunk} +```{r drugs_predictive_cn_altered_genes_gains, comment = NA, message=FALSE, warning=FALSE, eval = runPurpleChunk & runPurpleGainsChunk} ##### Generate table with drugs targeting CN altered genes genes <- cn_expr_genes.expr.gains.perc[[2]]$SYMBOL @@ -4643,7 +4651,7 @@ cat(drugsTable_legend) `r if ( params$drugs ) { c("#### Losses") }` -```{r drugs_predictive_cn_altered_genes_losses, comment = NA, message=FALSE, warning=FALSE, eval = runPurpleChunk} +```{r drugs_predictive_cn_altered_genes_losses, comment = NA, message=FALSE, warning=FALSE, eval = runPurpleChunk & runPurpleLossesChunk} ##### Generate table with drugs targeting CN altered genes genes <- cn_expr_genes.expr.losses.perc[[2]]$SYMBOL diff --git a/inst/scripts/icav2_download_and_run.R b/inst/scripts/icav2_download_and_run.R new file mode 100644 index 00000000..e02e947c --- /dev/null +++ b/inst/scripts/icav2_download_and_run.R @@ -0,0 +1,133 @@ +# helpers for downloading S3 data for local testing +require(dracarys, include.only = "s3_list_files_dir") +require(dplyr) +require(rportal, include.only = "orca_query_url") +require(glue, include.only = "glue") +require(here, include.only = "here") +require(tibble, include.only = "tibble") +require(tidyr, include.only = "pivot_longer") + +# make sure you have logged into AWS +c("AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_REGION") |> + rportal::envvar_defined() |> + stopifnot() +token_orca <- rportal::orca_jwt() |> rportal::jwt_validate() +portalRunId <- "20241201d2c4e79d" # failed run + +# query orca workflow manager for payload with rnasum s3 inputs +get_rnasum_inputs <- function(prid, token_orca) { + wfrid <- rportal::orca_prid2wfrid(prid = portalRunId, token = token) + states <- rportal::orca_wfrid2state(wfrid, token) # view states for gived wfrid + pldid <- states |> + filter(.data$status == "FAILED") |> + pull("payload") + url <- glue("https://workflow.prod.umccr.org/api/v1/payload/{pldid}") + pld <- rportal::orca_query_url(url, token) + s3inputs <- pld[["data"]][["inputs"]] |> + purrr::map(\(x) x |> stringr::str_replace("/$", "")) + return(s3inputs) +} + +rnasum_inputs <- get_rnasum_inputs(portalRunId, token) +# patterns of files to fish out +rnasum_file_regex <- tibble::tribble( + ~regex, ~fun, + "quant\\.genes\\.sf$", "DragenWtsQuantGenesSfFile", + "quant\\.sf$", "DragenWtsQuantSfFile", + "fusion_candidates\\.final$", "DragenWtsFusionsFinalFile", + "fusions\\.pdf$", "ArribaFusionsPdfFile", + "fusions\\.tsv$", "ArribaFusionsTsvFile", + "somatic\\.pcgr\\.snvs_indels\\.tiers\\.tsv$", "PcgrTiersTsvFile", + "purple\\.cnv\\.gene\\.tsv$", "PurpleCnvGeneTsvFile", + "manta\\.tsv$", "MantaTsvFile", + "mapping_metrics\\.csv$", "MapMetricsFile" +) +outdir <- here::here("nogit/patient_data") + +# melt s3_indir columns to get a single column with paths to S3 directories +# of interest, and fish out files of interest from each of them, then download +meta_rnasum <- rnasum_inputs |> + tibble::as_tibble_row() |> + tidyr::pivot_longer(contains("Uri"), names_to = "folder_type", values_to = "s3_indir") |> + mutate(folder_type = sub("Uri$", "", .data$folder_type)) |> + select(subjectId, wtsTumorLibraryId, folder_type, s3_indir) |> + rowwise() |> + mutate( + down = list(dracarys::dr_s3_download(s3dir = s3_indir, outdir = outdir, max_objects = 1000, regexes = rnasum_file_regex, dryrun = F)) + ) |> + ungroup() + +# saveRDS(meta_rnasum, here(glue("nogit/patient_data/down_{date1}.rds"))) +# meta_rnasum <- here::here(glue("nogit/patient_data/down_{date1}.rds")) |> readr::read_rds() +rnasum_params_set <- function(arriba_pdf, arriba_tsv, dataset, dragen_fusions, dragen_mapping_metrics, sv_tsv, + pcgr_tiers_tsv, purple_gene_tsv, report_dir, salmon, + sample_name, subject_id) { + params <- list( + arriba_pdf = arriba_pdf, + arriba_tsv = arriba_tsv, + batch_rm = TRUE, + cn_gain = 95, + cn_loss = 5, + dataset = dataset, + dataset_name_incl = FALSE, + dragen_fusions = dragen_fusions, + dragen_mapping_metrics = dragen_mapping_metrics, + drugs = FALSE, + filter = TRUE, + immunogram = FALSE, + log = TRUE, + sv_tsv = sv_tsv, + norm = "TMM", + pcgr_splice_vars = TRUE, + pcgr_tier = 4, + pcgr_tiers_tsv = pcgr_tiers_tsv, + project = "-", + purple_gene_tsv = purple_gene_tsv, + report_dir = report_dir, + salmon = salmon, + sample_name = sample_name, + sample_source = "-", + save_tables = TRUE, + scaling = "gene-wise", + subject_id = subject_id, + top_genes = 5, + transform = "CPM", + umccrise = NULL + ) + params +} + +# now unmelt to have one row per run +d_runs <- meta_rnasum |> + tidyr::unnest(down) |> + dplyr::select(subjectId, libraryId = "wtsTumorLibraryId", type, localpath) |> + tidyr::pivot_wider(names_from = type, values_from = localpath) + +# slice to whichever run you want from d +d_runs |> + # dplyr::slice(1) |> + dplyr::rowwise() |> + dplyr::mutate( + rnasum_dataset = "PANCAN", + params = list( + rnasum_params_set( + arriba_pdf = ArribaFusionsPdfFile, + arriba_tsv = ArribaFusionsTsvFile, + dataset = rnasum_dataset, + dragen_fusions = DragenWtsFusionsFinalFile, + dragen_mapping_metrics = MapMetricsFile, + sv_tsv = MantaTsvFile, + pcgr_tiers_tsv = PcgrTiersTsvFile, + purple_gene_tsv = PurpleCnvGeneTsvFile, + report_dir = here::here(glue::glue("nogit/patient_data/reports/{subjectId}_{libraryId}_{rnasum_dataset}")), + salmon = DragenWtsQuantGenesSfFile, + sample_name = glue::glue("{subjectId}_{libraryId}"), + subject_id = subjectId + ) + ), + rnasum_rmd = RNAsum::rnasum_rmd( + out_file = here::here(glue::glue("nogit/patient_data/reports_html/{subjectId}_{libraryId}_{rnasum_dataset}.html")), + quiet = FALSE, pars = params + ) + ) |> + dplyr::ungroup()