From ecd05c1896c4d713948dca5a389881e489ac607e Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Tue, 17 Dec 2024 18:40:17 +1100
Subject: [PATCH] update wgtsqc summary; add time metrics

---
 R/dragen.R                         |  6 ++--
 dracarys.Rproj                     |  1 +
 inst/reports/wgts-qc/dl_and_tidy.R | 33 +++++++++++++++++--
 inst/reports/wgts-qc/summary.qmd   | 51 +++++++++++++++++++++---------
 4 files changed, 70 insertions(+), 21 deletions(-)

diff --git a/R/dragen.R b/R/dragen.R
index 6bb9bea..9db6730 100644
--- a/R/dragen.R
+++ b/R/dragen.R
@@ -1081,10 +1081,10 @@ Wf_dragen <- R6::R6Class(
       dat <- d |>
         dplyr::mutate(
           Step = tools::toTitleCase(sub("Time ", "", .data$Step)),
-          Step = gsub(" |/", "", .data$Step),
-          Time = substr(.data$time_hrs, 1, 5)
+          Step = gsub(" |/", "", .data$Step)
+          # Time = substr(.data$time_hrs, 1, 5)
         ) |>
-        dplyr::select("Step", "Time") |>
+        dplyr::select("Step", Time = "time_sec") |>
         tidyr::pivot_wider(names_from = "Step", values_from = "Time") |>
         dplyr::relocate("TotalRuntime")
       tibble::tibble(name = "timemetrics", data = list(dat))
diff --git a/dracarys.Rproj b/dracarys.Rproj
index 766b3b2..4550325 100644
--- a/dracarys.Rproj
+++ b/dracarys.Rproj
@@ -1,4 +1,5 @@
 Version: 1.0
+ProjectId: d8b5f59e-ef53-4e08-821b-50ba53a42df7
 
 RestoreWorkspace: No
 SaveWorkspace: No
diff --git a/inst/reports/wgts-qc/dl_and_tidy.R b/inst/reports/wgts-qc/dl_and_tidy.R
index 55663b9..b4aebe7 100755
--- a/inst/reports/wgts-qc/dl_and_tidy.R
+++ b/inst/reports/wgts-qc/dl_and_tidy.R
@@ -18,7 +18,7 @@ c("AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_REGION") |>
 token <- rportal::orca_jwt() |>
   rportal::jwt_validate()
 dates <- c(
-  "2024-12-01"
+  "2024-12-1"
 ) |>
   stringr::str_remove_all("-") |>
   paste(collapse = "|")
@@ -107,7 +107,7 @@ data_tidy <- wf_lims |>
     indir = .data$output_dragenAlignmentOutputUri,
     outdir = file.path(sub("s3://", "", .data$indir)),
     outdir = fs::as_fs_path(file.path(normalizePath("~/s3"), .data$outdir))
-    # indir = file.path(outdir, "dracarys_s3_sync"), # for when debugging locally
+    # indir = outdir # for when debugging locally
   ) |>
   mutate(
     data_tidy = list(
@@ -121,6 +121,33 @@ data_tidy <- wf_lims |>
   ungroup()
 
 outdir1 <- fs::dir_create("inst/reports/wgts-qc/nogit/tidy_data_rds")
-date1 <- "2024-12-03"
+date1 <- "2024-12-17"
 data_tidy |>
   saveRDS(here(glue("{outdir1}/{date1}_wgts.rds")))
+
+#---- for debugging/changing parsers ----#
+data_tidy <- readRDS(here(glue("{outdir1}/{date1}_wgts.rds")))
+data_tidy2 <- data_tidy |>
+  select(-c(indir, outdir, data_tidy)) |>
+  rowwise() |>
+  mutate(
+    indir = .data$output_dragenAlignmentOutputUri,
+    outdir = file.path(sub("s3://", "", .data$indir)),
+    outdir = fs::as_fs_path(file.path(normalizePath("~/s3"), .data$outdir)),
+    indir = outdir, # for when debugging locally
+  ) |>
+  mutate(
+    data_tidy = list(
+      dracarys::dtw_Wf_dragen(
+        path = .data$indir,
+        prefix = .data$libraryId,
+        outdir = .data$outdir,
+        format = "rds",
+        max_files = 1000,
+        dryrun = FALSE
+      )
+    )
+  ) |>
+  ungroup()
+data_tidy2 |>
+  saveRDS(here(glue("{outdir1}/{date1}_wgts.rds")))
diff --git a/inst/reports/wgts-qc/summary.qmd b/inst/reports/wgts-qc/summary.qmd
index 13e042f..c88d7d1 100644
--- a/inst/reports/wgts-qc/summary.qmd
+++ b/inst/reports/wgts-qc/summary.qmd
@@ -22,9 +22,9 @@ format:
     code-block-border-left: true
     smooth-scroll: true
     grid:
-      body-width: 1300px
+      body-width: 1700px
 params:
-  tidy_data: "~/projects/dracarys/inst/reports/wgts-qc/nogit/tidy_data_rds/2024-12-03_wgts.rds"
+  tidy_data: "~/projects/dracarys/inst/reports/wgts-qc/nogit/tidy_data_rds/2024-12-17_wgts.rds"
 ---
 
 ```{r}
@@ -146,17 +146,18 @@ dr_unnest <- function(x1, ...) {
 
 ```{r}
 #| label: funcs
-dt_view <- function(x, caption = NULL, scroll_y = 10 + min(nrow(x) * 35, 570), ...) {
+dt_view <- function(x, caption = NULL, scroll_y = 10 + min(nrow(x) * 35, 570), fixedcols = 2, ...) {
   x |>
     DT::datatable(
       filter = list(position = "top", clear = FALSE, plain = TRUE),
       class = "cell-border display compact",
       rownames = FALSE,
-      extensions = c("Scroller", "Buttons", "KeyTable"),
+      extensions = c("Scroller", "Buttons", "KeyTable", "FixedColumns"),
       options = list(
         scroller = TRUE, scrollY = scroll_y, scrollX = TRUE,
         autoWidth = FALSE, keys = TRUE,
-        buttons = c("csv"), dom = "Blfrtip"
+        buttons = c("csv"), dom = "Blfrtip",
+        fixedColumns = list(leftColumns = fixedcols)
       ),
       ...
     )
@@ -252,7 +253,7 @@ conf <- list(
   pink_range = c(8, 20)
 )
 d_map |>
-  dt_view(scroll_y = 1500, escape = FALSE) |>
+  dt_view(fixedcols = 4, scroll_y = 1500, escape = FALSE) |>
   DT::formatCurrency(columns = names(d_map)[num_pct_cols], currency = "", digits = 1) |>
   DT::formatCurrency(columns = names(d_map)[no_numpct_cols], currency = "", digits = 0) |>
   DT::formatStyle(
@@ -305,7 +306,7 @@ d_cvg <- dr_unnest("covmetrics_wgs") |>
   )) |>
   mutate(umccrId = get_lib_url(lid = .data$libraryId, text = .data$umccrId)) |>
   select(
-    "umccrId", "phenotype", "type", "source",
+    "umccrId", "subjectId", "phenotype", "type", "source",
     "quality", "assay",
     "workflow", "projectOwnerName", "portalRunId",
     ploidy,
@@ -328,7 +329,7 @@ d_cvg <- dr_unnest("covmetrics_wgs") |>
   )
 num_cols <- names(d_cvg)[purrr::map_lgl(d_cvg, is.numeric)]
 d_cvg |>
-  dt_view(scroll_y = 1500, escape = FALSE) |>
+  dt_view(fixedcols = 4, scroll_y = 1500, escape = FALSE) |>
   DT::formatCurrency(columns = num_cols, currency = "", digits = 1) |>
   DT::formatStyle(
     "ploidy",
@@ -384,7 +385,7 @@ d_tr <- dr_unnest("trimmermetrics") |>
     -c("libraryId", "tidy_prefix")
   )
 d_tr |>
-  dt_view(escape = FALSE) |>
+  dt_view(fixedcols = 4, escape = FALSE) |>
   DT::formatStyle(
     "type",
     color = DT::styleEqual(
@@ -393,6 +394,24 @@ d_tr |>
   )
 ```
 
+### RunTime Metrics
+
+```{r}
+#| label: timemetrics
+#| fig-height: 13
+d_time <- dr_unnest("timemetrics")
+d_time_plot <- d_time |>
+  select(umccrId, type, phenotype, TotalRuntime:dplyr::last_col()) |>
+  tidyr::pivot_longer(TotalRuntime:dplyr::last_col(), names_to = "STEP", values_to = "RUNTIME_SEC") |>
+  filter(!is.na(.data$RUNTIME_SEC)) |>
+  ggplot(aes(x = RUNTIME_SEC, y = STEP, colour = umccrId)) +
+  geom_point() +
+  facet_wrap(~type, ncol = 1, scales = "free")
+
+plotly::ggplotly(d_time_plot)
+```
+
+
 ## FastQC {.tabset .tabset-pills}
 
 - Plot names used in MultiQC report are shown in **parentheses** for easier comparison.
@@ -574,7 +593,7 @@ f1 |>
 
 ```{r}
 #| label: fqc_readLengths
-#| fig-height: 8
+#| fig-height: 10
 read_len <- dr_unnest("fqc_readLengths")
 read_len_plot <- read_len |>
   group_by(umccrId, mate) |>
@@ -585,8 +604,9 @@ read_len_plot <- read_len |>
   ) |>
   ungroup() |>
   select(umccrId, type, mate, bp, value, tot, prop) |>
-  ggplot(aes(x = bp, y = prop, colour = umccrId)) +
-  geom_line(aes(linetype = mate), linewidth = 1) +
+  ggplot(aes(x = bp, y = prop, colour = umccrId, shape = mate)) +
+  geom_jitter(width = 0.1, height = 0.1) +
+  # geom_line(aes(linetype = mate), linewidth = 1) +
   theme(
     panel.grid.major = element_blank()
   ) +
@@ -595,8 +615,8 @@ read_len_plot <- read_len |>
     title = "Read Lengths",
     subtitle = glue("Read percentage with each observed length.")
   )
-# plotly::ggplotly(read_len_plot)
-read_len_plot
+plotly::ggplotly(read_len_plot)
+# read_len_plot
 ```
 
 ### Sequence Positions ('Adapter Content')
@@ -604,6 +624,7 @@ read_len_plot
 ```{r}
 #| label: fqc_sequencePositions
 #| fig-height: 80
+#| eval: false
 f1 <- dr_unnest("fqc_sequencePositions")
 f1 |>
   ggplot(aes(x = bp, y = value, colour = seq)) +
@@ -635,7 +656,7 @@ for (type1 in sort(unique(d1$type), decreasing = FALSE)) {
     p1 <- d1_type |>
       filter(umccrId == s) |>
       dracarys::WgsContigMeanCovFile$public_methods$plot() +
-      ggplot2::labs(subtitle = s)
+      labs(subtitle = s)
     cat(glue("\n#### {s}"), "\n")
     blank_lines(2)
     print(p1)