diff --git a/inst/rmd/umccr_workflows/umccrise/multi.Rmd b/inst/rmd/umccr_workflows/umccrise/multi.Rmd
index 4905c4a..21b7bb2 100644
--- a/inst/rmd/umccr_workflows/umccrise/multi.Rmd
+++ b/inst/rmd/umccr_workflows/umccrise/multi.Rmd
@@ -60,6 +60,7 @@ knitr::opts_chunk$set(
require(ggplot2, include.only = c("ggplot", "aes"))
require(lubridate, include.only = c("as_datetime"))
require(plotly, include.only = c("ggplotly"))
+ require(openssl, include.only = c("sha256"))
}
```
@@ -168,9 +169,21 @@ x5 <- parse_files(gds_map, 2001:2245, file.path(rds_path_out, "x5.rds"))
```{r data_load}
lims_raw <- here("nogit/umccrise/rds/lims/2023-09-04_lims_raw.rds") |>
readr::read_rds()
-dat_s3 <- fs::dir_ls(here("nogit/umccrise/rds/results"), regexp = "s[1-5]{1}.rds") |>
+dat_s3_raw <- fs::dir_ls(here("nogit/umccrise/rds/results"), regexp = "s[1-5]{1}.rds") |>
purrr::map(readr::read_rds) |>
bind_rows()
+# create sha256 for umccrise directory to distinguish between runs
+# keep first 8 digits and append to umccrise date folder.
+dat_s3 <- dat_s3_raw |>
+ mutate(
+ um_dir = sub("s3://umccr-primary-data-prod/(.*)/cancer_report_tables/.*", "\\1", path),
+ date_dir = basename(dirname(dirname(um_dir))),
+ date_dir = gsub("-", "", date_dir),
+ hash256 = openssl::sha256(um_dir),
+ hash256 = substr(hash256, 1, 8),
+ portal_run_id = glue("fake.{date_dir}{hash256}")
+ ) |>
+ select(-c(um_dir, date_dir, hash256, SampleID_tumor))
dat_gds <- fs::dir_ls(here("nogit/umccrise/rds/results"), regexp = "x[1-5]{1}.rds") |>
purrr::map(readr::read_rds) |>
bind_rows()
@@ -183,9 +196,10 @@ dat_s3_res <- dat_s3 |>
.default = .data$type
),
date_utc2 = lubridate::as_datetime(.data$date_utc, format = "%Y-%m-%dT%H:%M:%S+00:00"),
- date_analysed_aest = lubridate::with_tz(.data$date_utc2, tz = "Australia/Melbourne")
+ date_analysed_aest = lubridate::with_tz(.data$date_utc2, tz = "Australia/Melbourne"),
+ date_analysed_aest = as.character(.data$date_analysed_aest)
) |>
- select(date_analysed_aest, SubjectID, LibraryID_tumor, SampleID_tumor, type, objp)
+ select(date_analysed_aest, SubjectID, LibraryID_tumor, type, objp, portal_run_id)
dat_gds_res <- dat_gds |>
mutate(
type = case_when(
@@ -195,7 +209,7 @@ dat_gds_res <- dat_gds |>
),
date_analysed_aest = as.character(.data$end),
) |>
- select(date_analysed_aest, SubjectID, LibraryID_tumor, LibraryID_normal, type, objp, portal_run_id)
+ select(date_analysed_aest, SubjectID, LibraryID_tumor, type, objp, portal_run_id)
lims_s3 <- lims_raw |>
filter(LibraryID %in% dat_s3_res$LibraryID_tumor) |>
@@ -207,9 +221,13 @@ lims_gds <- lims_raw |>
distinct()
o1 <- dat_s3_res |>
- left_join(lims_s3, by = c("SubjectID", "LibraryID_tumor" = "LibraryID"))
-#################### UP TO HERE^ ###########################################
-
+ left_join(lims_s3, by = c("SubjectID", "LibraryID_tumor" = "LibraryID")) |>
+ mutate(
+ url = glue("https://portal.umccr.org/subjects/{.data$SubjectID}/overview"),
+ sbj_url = glue("{.data$SubjectID}"),
+ url = glue("{.data$url}")
+ ) |>
+ rename(portal_url = url)
o2 <- dat_gds_res |>
left_join(lims_gds, by = c("SubjectID", "LibraryID_tumor" = "LibraryID")) |>
mutate(
@@ -220,7 +238,11 @@ o2 <- dat_gds_res |>
) |>
rename(portal_url = url)
+d <- list(s3 = o1, gds = o2) |>
+ bind_rows(.id = "s3_or_gds")
+
dt_view <- function(x, scroll_y = 1000, ...) {
+ options(DT.TOJSON_ARGS = list(na = "string"))
x |>
mutate(across(where(is.character), as.factor)) |>
DT::datatable(
@@ -239,10 +261,10 @@ dt_view <- function(x, scroll_y = 1000, ...) {
)
}
-qcsum <- o2 |>
+qcsum <- d |>
filter(type == "UmQcSumFile") |>
tidyr::unnest_wider(objp)
-hrd_chord <- o2 |>
+hrd_chord <- d |>
filter(type == "UmChordTsvFile") |>
tidyr::unnest_wider(objp) |>
select(portal_run_id,
@@ -257,12 +279,12 @@ hrd_chord <- o2 |>
# filter(type == "UmHrdetectTsvFile") |>
# unnest_wider(objp) |>
# select(portal_run_id, hrdetect_prob = Probability)
-sigs_2015 <- o2 |>
+sigs_2015 <- d |>
filter(type == "UmSigsSnvFile2015") |>
tidyr::unnest_wider(objp) |>
select(-c(type)) |>
tidyr::unnest_longer(col = c(Rank, Signature, Contribution, RelFreq))
-sigs_2020 <- o2 |>
+sigs_2020 <- d |>
filter(type == "UmSigsSnvFile2020") |>
tidyr::unnest_wider(objp) |>
select(-c(type)) |>
@@ -274,7 +296,7 @@ sigs_2020 <- o2 |>
```{r final_tab}
cols_select1 <- c(
"date_analysed_aest", "SubjectID", "sbj_url", "LibraryID_tumor", "ExternalSubjectID",
- "ProjectOwner", "ProjectName", "Type", "Workflow", "LibraryID_normal",
+ "ProjectOwner", "ProjectName", "Type", "Workflow",
"hrd_chord", "hrd_hrdetect",
"chord_hr_status", "chord_hrd_type", "chord_p_BRCA1", "chord_p_BRCA2",
"qc_status_hmf", "sex_hmf", "purity_hmf", "ploidy_hmf", "msi_hmf",
@@ -309,10 +331,13 @@ dsig_filt <- dsig |>
dall <- qcsum |>
left_join(hrd_chord, by = "portal_run_id") |>
select(all_of(cols_select1), everything(), -c("type")) |>
- left_join(dsig_filt, by = "portal_run_id")
+ left_join(dsig_filt, by = "portal_run_id") |>
+ relocate(sig_top2, .before = "hrd_chord") |>
+ relocate(s3_or_gds, .after = "SubjectID")
+dt_view(dall)
```
-```{r join_excel_layla}
+```{r join_excel_layla, eval=FALSE}
excel_all <- here("nogit/umccrise/Combined analysis Jan22_Aug23.xlsx") |>
readxl::read_xlsx(sheet = "All")
excel_all |>
@@ -325,7 +350,7 @@ excel_all |>
### HRD Results
-```{r hrd_plot, fig.width=15, fig.height = 10}
+```{r hrd_plot, fig.width=15, fig.height = 15}
p1 <- dall |>
mutate(
sbj = glue("{SubjectID}_{LibraryID_tumor}"),
@@ -348,9 +373,7 @@ plotly::ggplotly(p1)
### Signature Results
-**TODO**
-
-```{r fig.width = 15, fig.height=50, eval=FALSE}
+```{r fig.width = 15, fig.height=65, eval=TRUE}
sig_order2015 <- paste0("Sig", 1:30)
sig_order2020 <- paste0(
"SBS",
@@ -366,8 +389,12 @@ sig_order2020 <- paste0(
)
)
-d2p <- d2 |>
- # filter(Rank %in% c(1:5)) |>
+p2_prep <- dsig |>
+ filter(
+ Sig_group == "s2015",
+ Rank %in% c(1:3)
+ ) |>
+ left_join(dall |> select(portal_run_id, date_analysed_aest, SubjectID, LibraryID_tumor), by = "portal_run_id") |>
mutate(
sbj = as.character(glue("{SubjectID}_{LibraryID_tumor}")),
date = lubridate::as_datetime(date_analysed_aest, format = "%Y-%m-%d %H:%M:%S")
@@ -376,13 +403,14 @@ d2p <- d2 |>
date, sbj, Sig_group, Rank, Signature, Contribution, RelFreq
) |>
mutate(Signature = factor(Signature, levels = c(sig_order2015, sig_order2020)))
-p2 <-
- d2p |>
- ggplot2::ggplot(aes(x = Contribution, y = sbj, fill = Signature, label = sbj)) +
+p2 <- p2_prep |>
+ filter(!grepl("ALLOCATE", sbj)) |> # get rid of ALLOCATE subject
+ ggplot2::ggplot(aes(x = Contribution, y = sbj, fill = Signature, text = sbj)) +
ggplot2::geom_bar(position = "fill", stat = "identity") +
- ggplot2::facet_wrap(~Sig_group, ncol = 1)
+ ggplot2::theme_bw(base_size = 7)
+# ggplot2::facet_wrap(~Sig_group, ncol = 1)
-plotly::ggplotly(p2)
+plotly::ggplotly(p2, tooltip = c("x", "text", "fill"))
```
@@ -391,25 +419,31 @@ plotly::ggplotly(p2)
### ProjectOwner
```{r ProjectOwner}
-count(d1, ProjectOwner) |> dt_view(scroll_y = 400)
+count(dall, ProjectOwner) |> dt_view(scroll_y = 400)
```
### ProjectName
```{r ProjectName}
-count(d1, ProjectName) |> dt_view(scroll_y = 400)
+count(dall, ProjectName) |> dt_view(scroll_y = 400)
```
### Type
```{r Type}
-count(d1, Type) |> dt_view(scroll_y = 400)
+count(dall, Type) |> dt_view(scroll_y = 400)
```
### Workflow
```{r Workflow}
-count(d1, Workflow) |> dt_view(scroll_y = 400)
+count(dall, Workflow) |> dt_view(scroll_y = 400)
+```
+
+### S3orGDS
+
+```{r s3orgds}
+count(dall, s3_or_gds) |> dt_view(scroll_y = 400)
```