Skip to content

Commit

Permalink
alignqc: include topup/rerun info (fixes #128)
Browse files Browse the repository at this point in the history
  • Loading branch information
pdiakumis committed Sep 13, 2024
1 parent 4680506 commit 620d795
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 20 deletions.
21 changes: 17 additions & 4 deletions inst/rmd/umccr_workflows/alignment_qc/dl_and_tidy.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,22 +28,35 @@ query_limsrow_libids <- function(libids) {
}

# first read in the workflows table, extract metadata, then join with lims
start_date <- "2024-08-03"
start_date <- "2024-09-09"
p_raw <- query_workflow_alignqc(start_date)

wgs <- p_raw |>
rportal::meta_wgs_alignment_qc(status = "Succeeded")
wts <- p_raw |>
rportal::meta_wts_alignment_qc(status = "Succeeded")
p <- bind_rows(wgs, wts)
lims <- query_limsrow_libids(p$LibraryID)
lims_raw <- query_limsrow_libids(p$LibraryID)

lims <- lims_raw |>
tidyr::separate_wider_delim(
library_id,
delim = "_", names = c("library_id", "topup_or_rerun"), too_few = "align_start"
) |>
select(
subject_id, library_id, sample_id, sample_name,
external_subject_id, external_sample_id,
project_name, project_owner, phenotype, type,
source, assay, quality, workflow
) |>
distinct()

d <- p |>
left_join(lims, by = c("SubjectID" = "subject_id", "LibraryID" = "library_id")) |>
select(
"SubjectID", "LibraryID", "SampleID", "lane", "phenotype", "type", "source",
"assay", "external_subject_id", "project_name", "project_owner",
"start", "end", "portal_run_id", "gds_outdir_dragen"
"assay", "workflow", "external_subject_id", "project_name", "project_owner",
"start", "end", "portal_run_id", "gds_outdir_dragen", "fq1", "fq2"
) |>
mutate(rownum = row_number())

Expand Down
56 changes: 40 additions & 16 deletions inst/rmd/umccr_workflows/alignment_qc/summary.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ output:
highlight: kate
params:
title: "UMCCR Alignment QC Summary Report"
meta: !r here::here("inst/rmd/umccr_workflows/alignment_qc/nogit/meta/2024-08-03_wgts.rds")
meta: !r here::here("inst/rmd/umccr_workflows/alignment_qc/nogit/meta/2024-09-09_wgts.rds")
description: "UMCCR Alignment QC Summary Report"
title: "`r params$title`"
---
Expand All @@ -33,14 +33,15 @@ knitr::opts_chunk$set(
```{r load_pkgs}
{
require(dplyr)
require(dracarys, include.only = "session_info_tbls")
require(dracarys, include.only = "session_info_kable")
require(DT, include.only = "datatable")
require(forcats, include.only = "fct_rev")
require(glue, include.only = "glue")
require(here, include.only = "here")
require(plotly, include.only = "ggplotly")
require(purrr, include.only = "map")
require(readr, include.only = "read_rds")
require(stringr, include.only = "str_extract")
require(tibble, include.only = "tibble")
require(knitr, include.only = "kable")
require(ggplot2)
Expand All @@ -50,7 +51,9 @@ knitr::opts_chunk$set(

```{r data_setup}
ggplot2::theme_set(ggplot2::theme_bw())
meta <- params[["meta"]] |> readr::read_rds()
meta <- params[["meta"]] |>
readr::read_rds() |>
mutate(topup_or_rerun = stringr::str_extract(fq1, "topup|rerun(2)?"))
stopifnot(all(dir.exists(meta$outdir)))
options(scipen = 999) # disable scientific notation
options(width = 150)
Expand Down Expand Up @@ -87,7 +90,8 @@ dat <- meta |>
rowwise() |>
mutate(
fpaths = list(filepaths(indir = .data$outdir, sampleid = .data$SampleID)),
umccrid = glue("{.data$SubjectID}_{.data$LibraryID}_LN{.data$lane}")
umccrid = glue("{.data$SubjectID}_{.data$LibraryID}_LN{.data$lane}"),
umccrid = if_else(is.na(.data$topup_or_rerun), .data$umccrid, glue("{umccrid}_{.data$topup_or_rerun}"))
) |>
select("umccrid", "phenotype", "type", "source", "fpaths") |>
tidyr::unnest(fpaths) |>
Expand Down Expand Up @@ -177,7 +181,11 @@ meta |>
SubjectID = get_sbj_url(.data$SubjectID),
durationMin = round(end - start)
) |>
select(SubjectID, type, LibraryID, lane, durationMin, everything(), -c("indir", "outdir", "cmd")) |>
select(
SubjectID, type, LibraryID, lane, durationMin, topup_or_rerun,
everything(),
-c("rownum", "indir", "outdir", "cmd", "fq1", "fq2")
) |>
dt_view(escape = FALSE) |>
DT::formatStyle(
"type",
Expand All @@ -193,7 +201,7 @@ meta |>

```{r mm, eval=eval$MappingMetricsFile}
d_map <- dr_unnest("MappingMetricsFile") |>
arrange(type, desc(umccrid)) |>
arrange(desc(umccrid), type) |>
select(
umccrid, phenotype, type,
source,
Expand Down Expand Up @@ -273,7 +281,7 @@ d_pl_metrics <- d_pl |>
)
# cov_genome_pct_* metrics are in the Hist data, so filter out here
d_cvg <- dr_unnest("WgsCoverageMetricsFile") |>
arrange(type, desc(umccrid)) |>
arrange(desc(umccrid)) |>
left_join(d_pl_metrics, by = c("umccrid", "phenotype", "type", "source")) |>
select(
umccrid, phenotype, type, source,
Expand Down Expand Up @@ -316,7 +324,7 @@ d_cvg |>

```{r trim, eval=eval$TrimmerMetricsFile}
d_tr <- dr_unnest("TrimmerMetricsFile") |>
arrange(type, desc(umccrid)) |>
arrange(desc(umccrid)) |>
select(
umccrid, phenotype, type, source,
reads_tot = reads_tot_input_dragen,
Expand Down Expand Up @@ -449,9 +457,11 @@ plotly::ggplotly(f1_plot)

### Positional Base Content ('Per-Position Sequence Content')

Skip

- TODO: create heatmap instead

```{r fqc_pbc, fig.height=42}
```{r fqc_pbc, eval=F, fig.height=42}
f1 <- dr_unnest("FastqcMetricsFile_positional_base_content")
f1 |>
filter(base != "N") |>
Expand All @@ -472,7 +482,9 @@ f1 |>

### Positional Base Mean Quality ('Per-Position Mean Quality Scores')

```{r fqc_bmq, fig.height=80}
Skip

```{r fqc_bmq, eval=F, fig.height=80}
f1 <- dr_unnest("FastqcMetricsFile_positional_base_mean_quality")
ggplot() +
geom_rect(
Expand All @@ -496,7 +508,9 @@ ggplot() +

### Positional Quality ('Per-Position Quality Score Ranges')

```{r fqc_pq, eval=T, fig.width=13}
Skip

```{r fqc_pq, eval=FALSE, fig.width=13}
# TODO: use boxplot instead of point
f1 <- dr_unnest("FastqcMetricsFile_positional_quality")
quants <- c(25, 50, 75)
Expand Down Expand Up @@ -539,7 +553,9 @@ plotly::ggplotly(read_len_plot)

### Sequence Positions ('Adapter Content')

```{r seq_pos, fig.height=42}
Skip

```{r seq_pos, eval=F, fig.height=42}
f1 <- dr_unnest("FastqcMetricsFile_sequence_positions")
f1 |>
ggplot(aes(x = bp, y = value, colour = seq)) +
Expand All @@ -555,7 +571,9 @@ f1 |>

## Coverage {.tabset .tabset-pills}

```{r contig_cvg, eval=eval$WgsContigMeanCovFile, results='asis', fig.height=5}
Skip

```{r contig_cvg, eval=FALSE, results='asis', fig.height=5}
d1 <- dr_unnest("WgsContigMeanCovFile") |>
arrange(desc("umccrid"))
for (type1 in sort(unique(d1$type), decreasing = FALSE)) {
Expand Down Expand Up @@ -606,7 +624,9 @@ plotly::ggplotly(flp)

- Only for WGS.

```{r pe, eval=eval$PloidyEstimationMetricsFile, fig.height=5}
Skip

```{r pe, eval=F, fig.height=5}
chrom_levels <- c(1:22, "x", "y")
d_pl_plot_data <- d_pl |>
select(
Expand All @@ -631,7 +651,9 @@ plotly::ggplotly(d_pl_plot)

## Hist

```{r cvgm, eval=eval$WgsCoverageMetricsFile, fig.height=8, fig.width=12}
Skip

```{r cvgm, eval=F, fig.height=8, fig.width=12}
d_hist <- dr_unnest("WgsHistFile")
d_hist1 <- d_hist |>
ggplot(aes(x = start, y = pct, colour = umccrid)) +
Expand Down Expand Up @@ -660,7 +682,9 @@ plotly::subplot(d_hist1, d_hist2, shareY = TRUE, titleY = TRUE, titleX = TRUE, n

## FineHist

```{r finehist, eval=eval$WgsFineHistFile, fig.height=10, fig.width=12}
Skip

```{r finehist, eval=FALSE, fig.height=10, fig.width=12}
d_fhist <- dr_unnest("WgsFineHistFile")
d_fhist |>
dracarys::WgsFineHistFile$public_methods$plot(c(0, 150)) +
Expand Down

0 comments on commit 620d795

Please sign in to comment.