diff --git a/inst/rmd/umccr_workflows/umccrise/dl_and_tidy.R b/inst/rmd/umccr_workflows/umccrise/dl_and_tidy.R index 82f9974..3b5ede3 100755 --- a/inst/rmd/umccr_workflows/umccrise/dl_and_tidy.R +++ b/inst/rmd/umccr_workflows/umccrise/dl_and_tidy.R @@ -44,7 +44,7 @@ lims <- lims_raw |> subject_id, library_id, sample_id, sample_name, external_subject_id, external_sample_id, project_name, project_owner, - source, quality + source, quality, workflow ) |> distinct() table(lims$library_id %in% meta$LibraryID_tumor) # double-check @@ -55,7 +55,7 @@ meta_lims <- meta |> select( rownum, wfr_id, version, end_status, start, end, portal_run_id, SubjectID, LibraryID_tumor, LibraryID_normal, SampleID_tumor, SampleID_normal, gds_outdir_umccrise, gds_indir_dragen_somatic, external_subject_id, external_sample_id, - project_owner, project_name, source, quality + project_owner, project_name, source, quality, workflow ) meta_lims |> saveRDS(here(glue("inst/rmd/umccr_workflows/umccrise/nogit/meta/{start_date}_{end_date}.rds"))) diff --git a/inst/rmd/umccr_workflows/umccrise/summary_umccrise.qmd b/inst/rmd/umccr_workflows/umccrise/summary_umccrise.qmd index 50b7329..0f9390e 100644 --- a/inst/rmd/umccr_workflows/umccrise/summary_umccrise.qmd +++ b/inst/rmd/umccr_workflows/umccrise/summary_umccrise.qmd @@ -57,6 +57,48 @@ date_end <- "2024-09-01" d_raw <- readr::read_rds(here(glue("inst/rmd/umccr_workflows/umccrise/nogit/results_{date_start}_{date_end}.rds"))) ``` +```{r} +#| label: funcs +dt_view <- function(x, id, ...) { + htmltools::browsable( + htmltools::tagList( + htmltools::tags$button( + htmltools::tagList(fontawesome::fa("download"), "CSV"), + onclick = glue("Reactable.downloadDataCSV('{id}', '{id}.csv')") + ), + x |> + reactable::reactable( + bordered = TRUE, + filterable = TRUE, + fullWidth = TRUE, + height = 800, + highlight = TRUE, + pagination = FALSE, + resizable = TRUE, + searchable = TRUE, + sortable = TRUE, + striped = TRUE, + wrap = FALSE, + elementId = id, + ... + ) + ) + ) +} +``` + +## Metadata + +```{r} +#| label: metadata +meta <- d_raw |> + select( + rownum, portal_run_id, SubjectID, LibraryID_tumor, SampleID_tumor, external_subject_id, external_sample_id, + project_owner, project_name, source, quality, workflow + ) +dt_view(meta, id = "metadata") +``` + ## Results ```{r} @@ -66,10 +108,61 @@ d <- d_raw |> tidyr::unnest_longer(res, indices_to = "filetype") # main_cols <- c("rownum", "portal_run_id", "SubjectID", "LibraryID_tumor") main_cols <- c("portal_run_id") +``` + +```{r} +#| label: qcsum qcsum <- d |> filter(filetype == "qcsummarytsv") |> + select(all_of(main_cols), res) |> + unnest_wider(res) +``` + +```{r} +#| label: pcgr +pcgr <- d |> + filter(filetype == "pcgrjson") |> + select(all_of(main_cols), res) |> unnest_wider(res) |> - select(all_of(main_cols), qc_status_hmf:bpi_enabled) + rename( + msi_fraction_indels_pcgr = "fracIndels", + msi_pcgr = "predicted_class", + tmb_pcgr = "tmb_estimate", + n_tmb_pcgr = "n_tmb" + ) |> + mutate(msi_pcgr = sub(" \\(.*\\)", "", msi_pcgr)) +``` + +```{r} +#| label: conpair +sampleids <- d_raw |> + select(SampleID_tumor, SampleID_normal) |> + tidyr::pivot_longer(everything(), values_to = "sampleid") |> + mutate(phenotype = sub("SampleID_", "", .data$name)) |> + select(sampleid, phenotype) |> + distinct() +conpair_raw <- d |> + filter(filetype == "conpairmultiqc") |> + select(all_of(main_cols), res) |> + tidyr::unnest(res) |> + left_join(sampleids, by = "sampleid") |> + select(-sampleid) +conpair_tumor <- conpair_raw |> + filter(phenotype == "tumor") +conpair_normal <- conpair_raw |> + filter(phenotype == "normal") |> + select(portal_run_id, contamination) +conpair <- conpair_tumor |> + left_join(conpair_normal, by = "portal_run_id", suffix = c("_tumor", "_normal")) |> + select(portal_run_id, + contamination_tumor_conpair = "contamination_tumor", + contamination_normal_conpair = "contamination_normal", + concordance_conpair = "concordance" + ) +``` + +```{r} +#| label: hrd hrd_chord <- d |> filter(filetype == "chordtsv") |> unnest_wider(res) |> @@ -84,6 +177,10 @@ hrd_hrdetect <- d |> filter(filetype == "hrdetecttsv") |> unnest_wider(res) |> select(all_of(main_cols), hrdetect_prob = "Probability") +``` + +```{r} +#| label: sigs sigs_snv2015 <- d |> filter(filetype == "sigssnv2015tsv") |> select(all_of(main_cols), res) |> @@ -104,8 +201,6 @@ sigs_indel <- d |> select(all_of(main_cols), res) |> tidyr::unnest_wider(res) |> tidyr::unnest_longer(col = c(Rank, Signature, Contribution, RelFreq)) - -# signatures dsig <- bind_rows( list( snv2015 = sigs_snv2015, snv2020 = sigs_snv2020, dbs = sigs_dbs, indel = sigs_indel @@ -128,20 +223,37 @@ dsig_filt <- dsig |> select(Sig_group, portal_run_id, sig_rank, sig_summary) |> tidyr::pivot_wider(names_from = sig_rank, values_from = sig_summary, names_prefix = "rank") |> mutate(sig_top2 = paste(rank1, rank2, sep = ", ")) |> - select(portal_run_id, sig_top2) + select(Sig_group, portal_run_id, sig_top2) |> + tidyr::pivot_wider(names_from = Sig_group, values_from = sig_top2) |> + select(portal_run_id, snv2015, snv2020, dbs, indel) +``` +```{r} +#| label: qc_all dall <- d_raw |> select( + rownum, date_analysed = "start", portal_run_id, SubjectID, LibraryID_tumor, SampleID_tumor, external_subject_id, external_sample_id, - project_owner, project_name, source, quality + project_owner, project_name, source, quality, workflow ) |> left_join(qcsum, by = "portal_run_id") |> left_join(hrd_chord, by = "portal_run_id") |> - left_join(hrd_hrdetect, by = "portal_run_id") + left_join(hrd_hrdetect, by = "portal_run_id") |> + left_join(pcgr, by = "portal_run_id") |> + left_join(conpair, by = "portal_run_id") +``` + +### Summary Metrics + +```{r} +#| label: summary_metrics +dt_view(dall, "summary_metrics") ``` +### HRD Plot + ```{r} #| label: hrd_plot #| fig-width: 15 @@ -159,7 +271,34 @@ p1 <- dall |> plotly::ggplotly(p1) ``` -### Signature Results +### Signatures + +#### All (SNV, Indel, DBS) + +```{r} +#| label: sig_results_all +dsig |> + left_join(meta |> select(rownum, portal_run_id, SubjectID, LibraryID_tumor), + by = "portal_run_id" + ) |> + select(rownum, portal_run_id, SubjectID, LibraryID_tumor, everything()) |> + dt_view("sig_results_all") +``` + +#### Top 2 + +```{r} +#| label: sig_results_top2 +dsig_filt |> + left_join(meta |> select(rownum, portal_run_id, SubjectID, LibraryID_tumor), + by = "portal_run_id" + ) |> + select(rownum, portal_run_id, SubjectID, LibraryID_tumor, everything()) |> + arrange(rownum) |> + dt_view("sig_results_top2") +``` + +#### Top 3 SNV2015 ```{r} #| label: sig_results @@ -167,19 +306,19 @@ plotly::ggplotly(p1) #| fig-height: 65 sig_order2015 <- paste0("Sig", 1:30) -sig_order2020 <- paste0( - "SBS", - c( - 1:6, - paste0(7, c("a", "b", "c", "d")), - 8:9, - paste0(10, c("a", "b", "c", "d")), - 11:16, - paste0(17, c("a", "b")), - 18:60, - 84:94 - ) -) +# sig_order2020 <- paste0( +# "SBS", +# c( +# 1:6, +# paste0(7, c("a", "b", "c", "d")), +# 8:9, +# paste0(10, c("a", "b", "c", "d")), +# 11:16, +# paste0(17, c("a", "b")), +# 18:60, +# 84:94 +# ) +# ) p2_prep <- dsig |> filter( @@ -189,12 +328,11 @@ p2_prep <- dsig |> left_join(dall |> select(portal_run_id, date_analysed, SubjectID, LibraryID_tumor), by = "portal_run_id") |> mutate(sbj = as.character(glue("{SubjectID}_{LibraryID_tumor}"))) |> select(date_analysed, sbj, Sig_group, Rank, Signature, Contribution, RelFreq) |> - mutate(Signature = factor(Signature, levels = c(sig_order2015, sig_order2020))) + mutate(Signature = factor(Signature, levels = sig_order2015)) p2 <- p2_prep |> ggplot2::ggplot(aes(x = Contribution, y = sbj, fill = Signature, text = sbj)) + ggplot2::geom_bar(position = "fill", stat = "identity") + ggplot2::theme_bw(base_size = 7) -# ggplot2::facet_wrap(~Sig_group, ncol = 1) plotly::ggplotly(p2, tooltip = c("x", "text", "fill")) ``` @@ -219,6 +357,13 @@ dall |> count(dall, source, quality) |> knitr::kable() ``` +### Workflow + +```{r} +#| label: workflow_summary +count(dall, workflow) |> knitr::kable() +``` + :::