Skip to content

Commit

Permalink
update wgtsqc summary; add time metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
pdiakumis committed Dec 17, 2024
1 parent 209b20c commit ecd05c1
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 21 deletions.
6 changes: 3 additions & 3 deletions R/dragen.R
Original file line number Diff line number Diff line change
Expand Up @@ -1081,10 +1081,10 @@ Wf_dragen <- R6::R6Class(
dat <- d |>
dplyr::mutate(
Step = tools::toTitleCase(sub("Time ", "", .data$Step)),
Step = gsub(" |/", "", .data$Step),
Time = substr(.data$time_hrs, 1, 5)
Step = gsub(" |/", "", .data$Step)
# Time = substr(.data$time_hrs, 1, 5)
) |>
dplyr::select("Step", "Time") |>
dplyr::select("Step", Time = "time_sec") |>
tidyr::pivot_wider(names_from = "Step", values_from = "Time") |>
dplyr::relocate("TotalRuntime")
tibble::tibble(name = "timemetrics", data = list(dat))
Expand Down
1 change: 1 addition & 0 deletions dracarys.Rproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Version: 1.0
ProjectId: d8b5f59e-ef53-4e08-821b-50ba53a42df7

RestoreWorkspace: No
SaveWorkspace: No
Expand Down
33 changes: 30 additions & 3 deletions inst/reports/wgts-qc/dl_and_tidy.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ c("AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_REGION") |>
token <- rportal::orca_jwt() |>
rportal::jwt_validate()
dates <- c(
"2024-12-01"
"2024-12-1"
) |>
stringr::str_remove_all("-") |>
paste(collapse = "|")
Expand Down Expand Up @@ -107,7 +107,7 @@ data_tidy <- wf_lims |>
indir = .data$output_dragenAlignmentOutputUri,
outdir = file.path(sub("s3://", "", .data$indir)),
outdir = fs::as_fs_path(file.path(normalizePath("~/s3"), .data$outdir))
# indir = file.path(outdir, "dracarys_s3_sync"), # for when debugging locally
# indir = outdir # for when debugging locally
) |>
mutate(
data_tidy = list(
Expand All @@ -121,6 +121,33 @@ data_tidy <- wf_lims |>
ungroup()

outdir1 <- fs::dir_create("inst/reports/wgts-qc/nogit/tidy_data_rds")
date1 <- "2024-12-03"
date1 <- "2024-12-17"
data_tidy |>
saveRDS(here(glue("{outdir1}/{date1}_wgts.rds")))

#---- for debugging/changing parsers ----#
data_tidy <- readRDS(here(glue("{outdir1}/{date1}_wgts.rds")))
data_tidy2 <- data_tidy |>
select(-c(indir, outdir, data_tidy)) |>
rowwise() |>
mutate(
indir = .data$output_dragenAlignmentOutputUri,
outdir = file.path(sub("s3://", "", .data$indir)),
outdir = fs::as_fs_path(file.path(normalizePath("~/s3"), .data$outdir)),
indir = outdir, # for when debugging locally
) |>
mutate(
data_tidy = list(
dracarys::dtw_Wf_dragen(
path = .data$indir,
prefix = .data$libraryId,
outdir = .data$outdir,
format = "rds",
max_files = 1000,
dryrun = FALSE
)
)
) |>
ungroup()
data_tidy2 |>
saveRDS(here(glue("{outdir1}/{date1}_wgts.rds")))
51 changes: 36 additions & 15 deletions inst/reports/wgts-qc/summary.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ format:
code-block-border-left: true
smooth-scroll: true
grid:
body-width: 1300px
body-width: 1700px
params:
tidy_data: "~/projects/dracarys/inst/reports/wgts-qc/nogit/tidy_data_rds/2024-12-03_wgts.rds"
tidy_data: "~/projects/dracarys/inst/reports/wgts-qc/nogit/tidy_data_rds/2024-12-17_wgts.rds"
---

```{r}
Expand Down Expand Up @@ -146,17 +146,18 @@ dr_unnest <- function(x1, ...) {

```{r}
#| label: funcs
dt_view <- function(x, caption = NULL, scroll_y = 10 + min(nrow(x) * 35, 570), ...) {
dt_view <- function(x, caption = NULL, scroll_y = 10 + min(nrow(x) * 35, 570), fixedcols = 2, ...) {
x |>
DT::datatable(
filter = list(position = "top", clear = FALSE, plain = TRUE),
class = "cell-border display compact",
rownames = FALSE,
extensions = c("Scroller", "Buttons", "KeyTable"),
extensions = c("Scroller", "Buttons", "KeyTable", "FixedColumns"),
options = list(
scroller = TRUE, scrollY = scroll_y, scrollX = TRUE,
autoWidth = FALSE, keys = TRUE,
buttons = c("csv"), dom = "Blfrtip"
buttons = c("csv"), dom = "Blfrtip",
fixedColumns = list(leftColumns = fixedcols)
),
...
)
Expand Down Expand Up @@ -252,7 +253,7 @@ conf <- list(
pink_range = c(8, 20)
)
d_map |>
dt_view(scroll_y = 1500, escape = FALSE) |>
dt_view(fixedcols = 4, scroll_y = 1500, escape = FALSE) |>
DT::formatCurrency(columns = names(d_map)[num_pct_cols], currency = "", digits = 1) |>
DT::formatCurrency(columns = names(d_map)[no_numpct_cols], currency = "", digits = 0) |>
DT::formatStyle(
Expand Down Expand Up @@ -305,7 +306,7 @@ d_cvg <- dr_unnest("covmetrics_wgs") |>
)) |>
mutate(umccrId = get_lib_url(lid = .data$libraryId, text = .data$umccrId)) |>
select(
"umccrId", "phenotype", "type", "source",
"umccrId", "subjectId", "phenotype", "type", "source",
"quality", "assay",
"workflow", "projectOwnerName", "portalRunId",
ploidy,
Expand All @@ -328,7 +329,7 @@ d_cvg <- dr_unnest("covmetrics_wgs") |>
)
num_cols <- names(d_cvg)[purrr::map_lgl(d_cvg, is.numeric)]
d_cvg |>
dt_view(scroll_y = 1500, escape = FALSE) |>
dt_view(fixedcols = 4, scroll_y = 1500, escape = FALSE) |>
DT::formatCurrency(columns = num_cols, currency = "", digits = 1) |>
DT::formatStyle(
"ploidy",
Expand Down Expand Up @@ -384,7 +385,7 @@ d_tr <- dr_unnest("trimmermetrics") |>
-c("libraryId", "tidy_prefix")
)
d_tr |>
dt_view(escape = FALSE) |>
dt_view(fixedcols = 4, escape = FALSE) |>
DT::formatStyle(
"type",
color = DT::styleEqual(
Expand All @@ -393,6 +394,24 @@ d_tr |>
)
```

### RunTime Metrics

```{r}
#| label: timemetrics
#| fig-height: 13
d_time <- dr_unnest("timemetrics")
d_time_plot <- d_time |>
select(umccrId, type, phenotype, TotalRuntime:dplyr::last_col()) |>
tidyr::pivot_longer(TotalRuntime:dplyr::last_col(), names_to = "STEP", values_to = "RUNTIME_SEC") |>
filter(!is.na(.data$RUNTIME_SEC)) |>
ggplot(aes(x = RUNTIME_SEC, y = STEP, colour = umccrId)) +
geom_point() +
facet_wrap(~type, ncol = 1, scales = "free")
plotly::ggplotly(d_time_plot)
```


## FastQC {.tabset .tabset-pills}

- Plot names used in MultiQC report are shown in **parentheses** for easier comparison.
Expand Down Expand Up @@ -574,7 +593,7 @@ f1 |>

```{r}
#| label: fqc_readLengths
#| fig-height: 8
#| fig-height: 10
read_len <- dr_unnest("fqc_readLengths")
read_len_plot <- read_len |>
group_by(umccrId, mate) |>
Expand All @@ -585,8 +604,9 @@ read_len_plot <- read_len |>
) |>
ungroup() |>
select(umccrId, type, mate, bp, value, tot, prop) |>
ggplot(aes(x = bp, y = prop, colour = umccrId)) +
geom_line(aes(linetype = mate), linewidth = 1) +
ggplot(aes(x = bp, y = prop, colour = umccrId, shape = mate)) +
geom_jitter(width = 0.1, height = 0.1) +
# geom_line(aes(linetype = mate), linewidth = 1) +
theme(
panel.grid.major = element_blank()
) +
Expand All @@ -595,15 +615,16 @@ read_len_plot <- read_len |>
title = "Read Lengths",
subtitle = glue("Read percentage with each observed length.")
)
# plotly::ggplotly(read_len_plot)
read_len_plot
plotly::ggplotly(read_len_plot)
# read_len_plot
```

### Sequence Positions ('Adapter Content')

```{r}
#| label: fqc_sequencePositions
#| fig-height: 80
#| eval: false
f1 <- dr_unnest("fqc_sequencePositions")
f1 |>
ggplot(aes(x = bp, y = value, colour = seq)) +
Expand Down Expand Up @@ -635,7 +656,7 @@ for (type1 in sort(unique(d1$type), decreasing = FALSE)) {
p1 <- d1_type |>
filter(umccrId == s) |>
dracarys::WgsContigMeanCovFile$public_methods$plot() +
ggplot2::labs(subtitle = s)
labs(subtitle = s)
cat(glue("\n#### {s}"), "\n")
blank_lines(2)
print(p1)
Expand Down

0 comments on commit ecd05c1

Please sign in to comment.