Merge pull request #155 from /issues/153-fusion-order

Arrange fusions df
umccr · Jul 4, 2024 · 9f8d33a · 9f8d33a
2 parents 5c9b57a + 6dc4805
commit 9f8d33a
Show file tree

Hide file tree

Showing 4 changed files with 36 additions and 23 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,15 +22,12 @@ Depends:
 Remotes:
     umccr/RNAsum.data
 Imports:
-    AnnotationDbi,
-    AnnotationFilter,
     assertthat,
     conflicted,
     dplyr,
     DT,
     EDASeq,
     edgeR,
-    ensembldb,
     fs,
     ggforce,
     ggplot2,

diff --git a/R/fusions.R b/R/fusions.R
@@ -161,11 +161,6 @@ fusions_table <- function(fusions) {
       )
     ) |>
     dplyr::ungroup() |>
-    # use desc since values are True (1) / False (0), so go from high (1) to low (0)
-    dplyr::arrange(
-      dplyr::desc(.data$geneA_dna_support), dplyr::desc(.data$geneB_dna_support),
-      dplyr::desc(.data$reported_fusion), .data$fusion_caller
-    ) |>
     dplyr::select(dplyr::any_of(
       # any_of handles cases when Arriba fusions are missing so e.g. there is no split_readsA col
       c(

diff --git a/R/utils_shortcuts.R b/R/utils_shortcuts.R
@@ -9,3 +9,18 @@ NULL
 
 #' @keywords internal
 "_PACKAGE"
+
+#' @noRd
+dummy1 <- function() {
+  # Solves R CMD check: Namespaces in Imports field not imported from
+  EDASeq::plotRLE
+  conflicted::conflict_scout
+  edgeR::DGEList
+  ggforce::geom_sina
+  ggplot2::ggplot
+  limma::removeBatchEffect
+  manhattanly::manhattanly
+  optparse::make_option
+  preprocessCore::normalize.quantiles
+  ragg::agg_png
+}
diff --git a/inst/rmd/rnasum.Rmd b/inst/rmd/rnasum.Rmd
@@ -70,13 +70,10 @@ knitr::opts_chunk$set(timeit = TRUE, echo = FALSE)
 # start with more exotic pkgs, then get to core ones
 {
   library(conflicted) # checks for pkg function conflicts
-  library(AnnotationDbi, include.only = c("keys"))
-  library(AnnotationFilter, include.only = c("GeneIdFilter"))
   library(DT, include.only = c("datatable"))
   library(dplyr, include.only = c("mutate", "select", "filter", "if_else"))
   library(EDASeq, include.only = c("plotRLE"))
   library(edgeR, include.only = c("DGEList"))
-  library(ensembldb, include.only = c("lengthOf"))
   library(fs, include.only = c("dir_create"))
   library(glue, include.only = c("glue"))
   library(ggforce, include.only = c("geom_sina"))
@@ -385,8 +382,9 @@ library_size <- d |>
   ggplot2::theme_minimal() +
   ggplot2::scale_y_continuous(breaks = scales::breaks_pretty(8))
 
-PlotsDir <- file.path(results_dir, "InputDataPlots")
-ggplot2::ggsave(file = file.path(PlotsDir, "library_size.png"), plot = library_size)
+PlotsDir <- file.path(results_dir, "InputDataPlots") |>
+  fs::dir_create()
+ggplot2::ggsave(filename = file.path(PlotsDir, "library_size.png"), plot = library_size)
 ```
 
 ```{r data_transformation_filtering, comment = NA, message=FALSE, warning=FALSE}
@@ -1130,7 +1128,7 @@ for (dataset in names(ref_dataset.list)) {
 }
 
 ##### Clean the space
-rm(dat1, data.df, edb, keys)
+rm(dat1, data.df)
 ```
 
 ```{r gene_annot_processed_data, comment = NA, message=FALSE, warning=FALSE}
@@ -1434,7 +1432,16 @@ if (runFusionChunk) {
       geneA_dna_support = dplyr::if_else(cond1, .data$geneA %in% manta_fusions, FALSE),
       geneB_dna_support = dplyr::if_else(cond1, .data$geneB %in% manta_fusions, FALSE)
     ) |>
-    dplyr::ungroup()
+    dplyr::ungroup() |>
+    # use desc since values are True (1) / False (0), so go from high (1) to low (0)
+    dplyr::arrange(
+      dplyr::desc(.data$geneA_dna_support),
+      dplyr::desc(.data$geneB_dna_support),
+      dplyr::desc(.data$reported_fusion),
+      dplyr::desc(.data$fusions_cancer),
+      dplyr::desc(.data$split_reads),
+      .data$fusion_caller
+    )
   fusions_ann <- fusions_ann |>
     dplyr::rowwise() |>
     dplyr::mutate(
@@ -2270,13 +2277,11 @@ Cells in [RED]{style="color:#ff0000"} indicate **DNA-supported** fusion genes (s
 
 Fusion events are ordered by the following columns:
 
-- **DNA support (A/B)**: DNA-supported fusion gene(s) (see [Structural variants] section)
-- **Confidence** level from [Arriba](https://arriba.readthedocs.io/en/latest/) tool
-- **Reported fusion**: fusion event reported in [FusionGDB](https://ccsm.uth.edu/FusionGDB)
-- **Split count**: the number of supporting split reads
-- **Pair count**: the number of supporting pair reads
-- **Cancer gene(s)**: gene fusion events involving [Cancer genes]
-- **Fusion gene (A/B)**: gene(s) known to be involved in tumorigenesis across cancer types based on [FusionGDB](https://ccsm.uth.edu/FusionGDB) and [CGI](https://www.cancergenomeinterpreter.org/biomarkers) databases
+- **DNA support (A/B)**: DNA-supported fusion gene(s) (see [Structural variants] section).
+- **Reported fusion**: fusion event reported in [FusionGDB](https://ccsm.uth.edu/FusionGDB).
+- **Cancer gene(s)**: gene fusion events involving [Cancer genes].
+- **Split count**: the total number of supporting split reads.
+- **Fusion caller**: Arriba first, then DRAGEN.
 
 </font>
 
@@ -2451,7 +2456,8 @@ output_density_B <- list()
 output_table_Z <- list()
 output_table_perc <- list()
 
-##### Deal with no genes or when more than 10 genes are of interest
+##### Deal with no genes or when more than X genes are of interest
+# This assumes the fusions df has been sorted accordingly
 if (length(genes) == 0) {
   genes <- NULL
   genes_no <- 0