Merge pull request #34 from morinlab/kdreval-helpers-setup

Major updates
morinlab · Mar 15, 2024 · 9bcc4fe · 9bcc4fe
2 parents 94920fd + 8839fc1
commit 9bcc4fe
Show file tree

Hide file tree

Showing 53 changed files with 1,046 additions and 188 deletions.
diff --git a/.github/workflows/build_check.yaml b/.github/workflows/build_check.yaml
@@ -0,0 +1,49 @@
+name: GAMBLR.helpers build check
+
+on:
+  pull_request:
+    branches: [master]
+
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -el {0}
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+
+    - name: Setup Conda
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.11
+
+    - name: Create conda environment
+      uses: conda-incubator/setup-miniconda@v2
+      with:
+        activate-environment: r
+        channels: conda-forge,defaults
+        python-version: 3.11
+        auto-activate-base: false
+        environment-file: envs/r.yaml
+
+    - name: Build package
+      run:
+        Rscript -e "devtools::install()"
+
+    - name: Check package
+      run:
+        Rscript -e "devtools::check(vignettes = FALSE, args = '--no-examples')"
+
+    - name: Upload check results
+      if: failure()
+      uses: actions/upload-artifact@main
+      with:
+        name: ${{ runner.os }}-r${{ matrix.config.r }}-results
+        path: check
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -17,7 +17,6 @@ Imports:
     ggthemes,
     philentropy,
     readr,
-    reshape2,
     stringr,
     tibble,
     tidyr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -24,7 +24,6 @@ export(maf_header)
 export(normalize_expression_data)
 export(rainfall_conv)
 export(required_cols)
-export(review_hotspots)
 export(sanity_check_metadata)
 export(socketWrite)
 export(standardize_chr_prefix)
@@ -44,7 +43,6 @@ import(workflowr)
 importFrom(dplyr,left_join)
 importFrom(ggthemes,theme_foundation)
 importFrom(philentropy,KL)
-importFrom(reshape2,melt)
 importFrom(stats,end)
 importFrom(stats,quantile)
 importFrom(stats,start)

diff --git a/R/GAMBLR.helpers-package.R b/R/GAMBLR.helpers-package.R
@@ -4,7 +4,6 @@
 ## usethis namespace: start
 #' @importFrom dplyr left_join
 #' @importFrom philentropy KL
-#' @importFrom reshape2 melt
 #' @importFrom stats end
 #' @importFrom stats quantile
 #' @importFrom stats start

diff --git a/R/bins_to_bedgraph.R b/R/bins_to_bedgraph.R
@@ -1,3 +1,13 @@
+#' @title Bins to bed graph.
+#'
+#' @description This function will generate bed file.
+#'
+#' @param bin_df Data frame with bins.
+#' @param min_value Minimum value to retain the bin.
+#' @param filename Path to a local file on drive to save the resulting file.
+#'
+#' @return full path to the file that was written
+#'
 #' @export
 bins_to_bedgraph = function(bin_df,min_value = 3,filename = "test.bed"){
   bed_cols = dplyr::select(bin_df,1,2,3,smoothed_ratio)
@@ -7,8 +17,8 @@ bins_to_bedgraph = function(bin_df,min_value = 3,filename = "test.bed"){
     bed_cols = mutate(bed_cols,chr = paste0("chr",chr))
   }
   bed_cols = dplyr::filter(bed_cols,value > min_value)
-  this = dplyr::filter(chr1p,bin_start==27985000) 
+  this = dplyr::filter(chr1p,bin_start==27985000)
   bed_cols = mutate(bed_cols,end=format(end, scientific=F),start=format(start, scientific=F))
   write.table(bed_cols,row.names=F,col.names=F,quote=F,sep="\t",file=filename)
-
+  return(filename)
 }
diff --git a/R/cache_output.R b/R/cache_output.R
@@ -1,3 +1,16 @@
+#' @title Cache output.
+#'
+#' @description TODO.
+#'
+#' @param result_df TODO.
+#' @param function_name TODO.
+#' @param clobber_mode TODO.
+#' @param get_existing TODO.
+#' @param function_params TODO.
+#' @param additional_details TODO.
+#'
+#' @return full path to the file that was written
+#'
 #' @export
 cache_output = function(result_df,
                         function_name,
@@ -31,4 +44,5 @@ cache_output = function(result_df,
 
   message(paste("creating/overwriting",cache_file_name))
   write_tsv(result_df,file=cache_file_name)
+  return(cache_file_name)
 }
diff --git a/R/calculate_tmb.R b/R/calculate_tmb.R
@@ -1,4 +1,4 @@
-#' @title Return TMB counts.
+#' @title Calculate tumour mutation burden.
 #'
 #' @description This function implements tumor mutation burden calculation.
 #' TODO: add more details.
@@ -31,7 +31,7 @@
 #'     maf1,
 #'     regions_bed = grch37_ashm_regions
 #' )
-#' #' calculate_tmb(
+#' calculate_tmb(
 #'     maf1,
 #'     regions_bed = grch37_ashm_regions,
 #'     subset_to_nonSyn = FALSE

diff --git a/R/check_config_value.R b/R/check_config_value.R
@@ -3,14 +3,14 @@
 #' @description Check the existence of a specific config key.
 #' The function will notify the user and end the program if no such key exists.
 #'
-#' @details INTERNAL FUNCTION for checking the existence of a config value, not meant for out-of-package usage.
+#' @details INTERNAL FUNCTION for checking the existence of a config value,
+#' not meant for out-of-package usage.
 #'
 #' @param config_key key from config, prefixed with config::get()
 #'
-#' @return A string with the path to a file specified in the config or nothing (if config key is NULL).
+#' @return A string with the path to a file specified in the config or nothing
+#' (if config key is NULL).
 #' 
-#' @noRd
-#'
 #' @examples
 #' check_config_value(config::get("resources")$blacklist$template)
 #'

diff --git a/R/check_file_details.R b/R/check_file_details.R
@@ -1,3 +1,13 @@
+#' @title Check file details
+#'
+#' @description When relative path to a file is given, this function will
+#' automatically generate it's full path using the project_base config value
+#' and check for existence of the file at the full path. Can operate on vector
+#' of several relative paths.
+#'
+#' @param relative_paths Vector of relative paths.
+#' @return Boolean
+#'
 #' @export
 check_file_details = function(relative_paths){
   not_found = c()

diff --git a/R/compare_coding_mutation_pattern.R b/R/compare_coding_mutation_pattern.R
@@ -1,3 +1,13 @@
+#' @title Compare pattern of coding mutations.
+#'
+#' @description TODO.
+#'
+#' @param maf_df1 TODO.
+#' @param maf_df2 TODO.
+#' @param gene TODO.
+#'
+#' @return list
+#'
 #' @export
 compare_coding_mutation_pattern = function(maf_df1,maf_df2,gene){
   if(missing(maf_df1) | missing(maf_df2)){

diff --git a/R/copy_no_clobber.R b/R/copy_no_clobber.R
@@ -1,3 +1,13 @@
+#' @title Copy not clobber.
+#'
+#' @description TODO.
+#'
+#' @param from_file TODO.
+#' @param to_file TODO.
+#' @param force TODO.
+#'
+#' @return full path to the file that was written
+#'
 #' @export
 copy_no_clobber = function(from_file,
                            to_file,
@@ -9,4 +19,5 @@ copy_no_clobber = function(from_file,
   if(force){
     file.copy(from_file,to_file)
   }
+  return(to_file)
 }
diff --git a/R/gene_mutation_tally.R b/R/gene_mutation_tally.R
@@ -1,3 +1,22 @@
+#' @title Tally coding mutations load.
+#'
+#' @description Calculate the number of coding mutations per grouping. Silent
+#' variants are excluded from this calculation. The incoming maf may contain
+#' non-coding variants, which will be excluded from tallying.
+#'
+#' @param maf_df Data frame of simple somatic mutations in maf format. Required
+#'      parameter.
+#' @param these_samples_metadata Data frame with metadata. Must contain sample
+#'      identifiers in the `sample_id` column and column that will be used
+#'      to calulate the mutation load. All other columns are ignored. Required
+#'      parameter.
+#' @param these_genes Vector of hugo symbols for genes to be considered for the
+#'      tallying of mutations. Required parameter.
+#' @param grouping_variable Column in the metadata that will be used as grouping
+#'      variable. By default, the `cohort` is used.
+#'
+#' @return data frame
+#'
 #' @export
 gene_mutation_tally = function(maf_df,these_samples_metadata,these_genes,grouping_variable="cohort"){
   meta = dplyr::select(these_samples_metadata,sample_id,{{grouping_variable}})

diff --git a/R/get_template_wildcards.R b/R/get_template_wildcards.R
@@ -1,3 +1,12 @@
+#' @title Get template wildcards.
+#'
+#' @description TODO.
+#'
+#' @param parent_key TODO.
+#' @param template_key TODO.
+#'
+#' @return list
+#'
 #' @export
 get_template_wildcards = function(parent_key,
                                   template_key){

diff --git a/R/get_unmatched_normals.R b/R/get_unmatched_normals.R
@@ -1,9 +1,33 @@
-#helper function to get the unmatched normals from the main config
+#' @title Get unmatched normals.
+#'
+#' @description Helper function to get the unmatched normals from the main
+#' config.
+#'
+#' @param seq_type_filter Seq type key from config for which to return the
+#'      unmatched normals for.
+#'
+#' @return data frame
+#'
+#' @import dplyr tidyr tibble
 #' @export
 get_unmatched_normals = function(seq_type_filter){
-  a = check_config_value(config::get("unmatched_normal_ids"))
-  df = melt(a,value.name="normal_sample_id") %>%
-    rename(c("genome_build"="L3","seq_type"="L2","unix_group"="L1")) %>%
-    dplyr::filter(seq_type == seq_type_filter)
-  return(df)
+    a <- check_config_value(config::get("unmatched_normal_ids"))
+    df <- a %>%
+        as.data.frame(
+            check.names = FALSE
+        ) %>%
+        t %>%
+        as.data.frame() %>%
+        tibble::rownames_to_column("rownames") %>%
+        tidyr::separate(
+            rownames,
+            into = c("unix_group", "seq_type", "genome_build"),
+            sep = "\\."
+        ) %>%
+        dplyr::select(
+            "normal_sample_id" = "V1",
+            genome_build, seq_type, unix_group
+        ) %>%
+        dplyr::filter(seq_type == seq_type_filter)
+    return(df)
 }
diff --git a/R/grob_wildcards.R b/R/grob_wildcards.R
@@ -1,4 +1,11 @@
-#Helper functions not for export
+#' @title Glob wildcards.
+#'
+#' @description Helper function to extract wildcard values from a string.
+#'
+#' @param wildcarded_string String containing wildcards inside the {} notations.
+#'
+#' @return vector
+#'
 #' @export
 grob_wildcards = function(wildcarded_string){
   wildcards = unlist(stringr::str_extract_all(wildcarded_string,"\\{[^\\{]+\\}"))

diff --git a/R/handle_metadata.R b/R/handle_metadata.R
@@ -12,8 +12,6 @@
 #'
 #' @import GAMBLR.data dplyr
 #'
-#' @noRd
-#'
 #' @export
 handle_metadata = function(this_seq_type = "genome") {
     if ("GAMBLR.data" %in% installed.packages()) {