From 5c0bd33b332a24a0e4e209a52b7459f214cc50fa Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Wed, 5 Dec 2018 14:23:08 +0800 Subject: [PATCH 01/32] Adding Windows and OSX testing Former-commit-id: 5ac51382d5446f6387f24509cf8977a9e0df5122 --- .travis.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.travis.yml b/.travis.yml index bd4baca..5a136d9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,11 @@ r: - devel cache: packages +os: + - linux + - windows + - osx + env: - _R_CHECK_FORCE_SUGGESTS_=FALSE From f3c81ee788f793c4cd102e02761d62d83554cfaf Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Wed, 5 Dec 2018 14:57:30 +0800 Subject: [PATCH 02/32] checking Linux (incl devel) and Mac (excl devel) Former-commit-id: 9b0d3b365166aeb1f19f173bee2e6f7f239535bd --- .travis.yml | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5a136d9..a06a673 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,14 +1,22 @@ language: r -r: - - oldrel - - release - - devel -cache: packages -os: - - linux - - windows - - osx +# Test both linux and mac, but not devel release on mac +matrix: + include: + - os: linux + r: oldrel + - os: linux + r: release + - os: linux + r: devel + - os: osx + r: oldrel + - os: osx + r: release + #- os: osx + # r: devel + +cache: packages env: - _R_CHECK_FORCE_SUGGESTS_=FALSE From 393a68f88ec17d9c295812d1fd9af4a2633bcfb7 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Wed, 5 Dec 2018 15:03:28 +0800 Subject: [PATCH 03/32] allow osx fails on R devel Former-commit-id: c13d88cc551bcd1261d0ffe00c04de4fb51a6342 --- .travis.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index a06a673..cc9aa2d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: r -# Test both linux and mac, but not devel release on mac +# Test both linux and mac matrix: include: - os: linux @@ -13,8 +13,12 @@ matrix: r: oldrel - os: osx r: release - #- os: osx - # r: devel + - os: osx + r: devel + allow_failures: + # devel on osx fails + - os: osx + r: devel cache: packages From be5a2c620db9bdabd652285c047aa29798cd71ff Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Wed, 5 Dec 2018 15:07:43 +0800 Subject: [PATCH 04/32] windows OS check for future Former-commit-id: 5f14e081b6ac9ace4e9edbc2ebac74f06ab1ac11 --- .travis.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.travis.yml b/.travis.yml index cc9aa2d..0d02ade 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,10 +15,14 @@ matrix: r: release - os: osx r: devel + - os: windows + r: release allow_failures: # devel on osx fails - os: osx r: devel + - os: windows + r: release cache: packages From 7fb4f7db8fa2f4cf777dc465e52339dec0136fb1 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Wed, 12 Dec 2018 12:43:12 +0800 Subject: [PATCH 05/32] making a simple ggplot method Former-commit-id: 67720866bf50270fb77c2854231945c187f29a24 --- R/ggplot.exstra_score.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 R/ggplot.exstra_score.R diff --git a/R/ggplot.exstra_score.R b/R/ggplot.exstra_score.R new file mode 100644 index 0000000..a26307b --- /dev/null +++ b/R/ggplot.exstra_score.R @@ -0,0 +1,14 @@ +library(data.table) +library(exSTRa) +library(ggplot2) + +ggplot.exstra_score <- function(data = NULL, ...) { + + ggplot(data$data, + ...) +} + +ggplot(exstra_wgs_pcr_2, aes(x = rep, group = sample)) + stat_ecdf() + +ggplot(exstra_wgs_pcr_2, aes(x = rep, colour = sample)) + stat_ecdf(aes(colour = sample)) + + facet_wrap(~locus) From 5076bb8939429d40fc76da7714910118b6ce9e35 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Fri, 18 Jan 2019 13:25:38 +0800 Subject: [PATCH 06/32] ggplot2 imports Former-commit-id: c8ac66624a3092467afbf2dff3cdd89a93c54afb --- DESCRIPTION | 10 +++++++++- NAMESPACE | 3 +++ R/ggplot.exstra_score.R | 27 ++++++++++++++++++--------- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4bcb072..be246ef 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,14 @@ Author: Rick Tankard Maintainer: Rick Tankard Description: Detecting expansions with paired-end Illumina sequencing data. License: GPL-2 -Imports: testit, data.table, stringr, reshape2, magrittr, RColorBrewer +Imports: + testit, + data.table, + stringr, + reshape2, + magrittr, + RColorBrewer, + ggplot2 Suggests: testthat, DiagrammeR, DiagrammeRsvg, xlsx, knitr, rmarkdown @@ -24,6 +31,7 @@ Collate: 'exstra_wgs_pcr_2.R' 'filter_low_scores.R' 'filter_sex.R' + 'ggplot.exstra_score.R' 'loci_normal_expansion.R' 'munoz_rueda_al1.R' 'p_values.R' diff --git a/NAMESPACE b/NAMESPACE index a2b5798..611979a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ S3method(copy,exstra_db) S3method(copy,exstra_score) S3method(copy,exstra_tsum) S3method(dim,exstra_score) +S3method(ggplot,exstra_score) S3method(length,exstra_score) S3method(loci,exstra_db) S3method(loci_text_info,exstra_db) @@ -26,6 +27,7 @@ export(copy) export(exstra_db_text) export(filter_low_scores) export(filter_sex) +export(ggexstra) export(is.exstra_db) export(is.exstra_score) export(is.exstra_tsum) @@ -47,6 +49,7 @@ export(suggested_exstra_pipeline) export(tsum_p_value_summary) export(tsum_test) import(data.table) +import(ggplot2) import(magrittr) import(parallel) import(stringr) diff --git a/R/ggplot.exstra_score.R b/R/ggplot.exstra_score.R index a26307b..2bda576 100644 --- a/R/ggplot.exstra_score.R +++ b/R/ggplot.exstra_score.R @@ -1,14 +1,23 @@ -library(data.table) -library(exSTRa) -library(ggplot2) +#' @import ggplot2 +#' @include exstra_wgs_pcr_2.R +NULL +#' @export ggplot.exstra_score <- function(data = NULL, ...) { - - ggplot(data$data, - ...) + #TODO: verify keys + ggplot(data, ...) } -ggplot(exstra_wgs_pcr_2, aes(x = rep, group = sample)) + stat_ecdf() +#' @export +ggexstra <- function(x, mapping = aes(x = rep, colour = sample), ...) { + ggplot(x, mapping = mapping, ...) + stat_ecdf() +} + + + +# ggplot(exstra_wgs_pcr_2, aes(x = rep, group = sample)) + stat_ecdf() + +# ggplot(exstra_wgs_pcr_2, aes(x = rep, colour = sample)) + stat_ecdf(aes(colour = sample)) + +# facet_wrap(~locus) -ggplot(exstra_wgs_pcr_2, aes(x = rep, colour = sample)) + stat_ecdf(aes(colour = sample)) + - facet_wrap(~locus) +# ggexstra(exstra_wgs_pcr_2) From d56f6627667fb097a038addeba95a38d61accc63 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Fri, 18 Jan 2019 13:26:24 +0800 Subject: [PATCH 07/32] prep for public data trio download From GIAB AshkenazimTrio Former-commit-id: 1c2ef1ceea68893c4423c28eef0148095c26963c --- inst/tools/giab_read_extraction.R | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 inst/tools/giab_read_extraction.R diff --git a/inst/tools/giab_read_extraction.R b/inst/tools/giab_read_extraction.R new file mode 100644 index 0000000..826440b --- /dev/null +++ b/inst/tools/giab_read_extraction.R @@ -0,0 +1,19 @@ +library(data.table) +library(exSTRa) +library(glue) + +# Write bed file of reads to extract +known_extraction_bed <- exstra_known$db[, .(chrom, start = chromStart - 1001, end = chromEnd + 1000)] +readr::write_tsv(known_extraction_bed, path = "inst/extdata/known_bam_extract.bed") + +# Extract reads using external tool samtools +bamfile <- "ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/HG002_NA24385_son/NIST_HiSeq_HG002_Homogeneity-10953946/NHGRI_Illumina300X_AJtrio_novoalign_bams/HG002.hs37d5.300x.bam" +system(glue("bash samtools view -L inst/extdata/known_bam_extract.bed {bamfile}")) + +# Also do this with: +# ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/HG003_NA24149_father/NIST_HiSeq_HG003_Homogeneity-12389378/NHGRI_Illumina300X_AJtrio_novoalign_bams/HG003.hs37d5.300x.bam +# ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/HG004_NA24143_mother/NIST_HiSeq_HG004_Homogeneity-14572558/NHGRI_Illumina300X_AJtrio_novoalign_bams/HG004.hs37d5.300x.bam + +# Filter reads that do not have both pairs (preventing warnings) + + From e4e4e9116d480a5083b1ad1b60cb48adf867e3e4 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Fri, 18 Jan 2019 13:27:08 +0800 Subject: [PATCH 08/32] add TODO for verification function Former-commit-id: a51efd4e3822ff064d139595951db80f35d7cb33 --- R/CLASS_exstra_score.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/CLASS_exstra_score.R b/R/CLASS_exstra_score.R index 3b004e7..f5a0669 100644 --- a/R/CLASS_exstra_score.R +++ b/R/CLASS_exstra_score.R @@ -346,3 +346,6 @@ as.exstra_score <- function(x, copy = FALSE) { } structure(list(data = x$data, db = x$db, input_type = x$input_type, samples = x$samples), class = c("exstra_score", "exstra_db")) } + + +#TODO: create verification function From b6c6b8c999355a6da340b4fa76cea87fb4b2e4a1 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Wed, 23 Jan 2019 14:17:11 +0800 Subject: [PATCH 09/32] added verification functions for key checks Classes: exstra_db exstra_score Checks keys are set properly Former-commit-id: f714b1551b02556acbf3fe26806f650390d329ad --- R/CLASS_exstra_db.R | 9 ++++++++- R/CLASS_exstra_score.R | 8 +++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/R/CLASS_exstra_db.R b/R/CLASS_exstra_db.R index 7c8e8db..394d69d 100644 --- a/R/CLASS_exstra_db.R +++ b/R/CLASS_exstra_db.R @@ -123,4 +123,11 @@ copy.exstra_db <- function(x, ...) { #TODO length(exstra_db) = number of loci -#TODO dim(exstra_db) = dim(exstra_db$db) \ No newline at end of file +#TODO dim(exstra_db) = dim(exstra_db$db) + +# Verify keys of exstra_db +verify.exstra_db <- function(x) { + assert("Object should inherit from class exstra_db.", is.exstra_db(x)) + assert("Key of x$db should be 'locus'", key(x$db) == "locus") + invisible(TRUE) +} diff --git a/R/CLASS_exstra_score.R b/R/CLASS_exstra_score.R index f5a0669..05ad78f 100644 --- a/R/CLASS_exstra_score.R +++ b/R/CLASS_exstra_score.R @@ -348,4 +348,10 @@ as.exstra_score <- function(x, copy = FALSE) { } -#TODO: create verification function +# Verify keys of exstra_score +verify.exstra_score <- function(x) { + assert("Object should inherit from class exstra_score.", is.exstra_score(x)) + assert("Key of x$data should be 'locus', 'sample'", identical(key(x$data), c("locus", "sample"))) + assert("Key of x$samples should be 'sample'.", key(x$samples) == "sample") + verify.exstra_db(exstra_wgs_pcr_2) +} From 15e952a085890175108c3c09a48bad900e21d122 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Wed, 23 Jan 2019 14:17:42 +0800 Subject: [PATCH 10/32] ggplot method now merges with samples Former-commit-id: 8135b64d591134af1e3221d45fc7a812e9ac9bcb --- R/ggplot.exstra_score.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/ggplot.exstra_score.R b/R/ggplot.exstra_score.R index 2bda576..da97ee5 100644 --- a/R/ggplot.exstra_score.R +++ b/R/ggplot.exstra_score.R @@ -4,8 +4,10 @@ NULL #' @export ggplot.exstra_score <- function(data = NULL, ...) { - #TODO: verify keys - ggplot(data, ...) + verify.exstra_score(data) + # Merge sample data into main data + data <- merge(data$data, data$samples, by = "sample") + ggplot(data, ...) } #' @export From a8270f59e7aa9f274f2633940c6fb3db9f53ebb9 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Mon, 20 May 2019 16:26:26 +0800 Subject: [PATCH 11/32] small bugs in exSTRa repeat db file generation tool Former-commit-id: c498c0ce061c28feae7d5d4dbb20694bbff554ea --- inst/tools/prepare_exSTRa_input_db.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tools/prepare_exSTRa_input_db.R b/inst/tools/prepare_exSTRa_input_db.R index 746b733..8b4734f 100755 --- a/inst/tools/prepare_exSTRa_input_db.R +++ b/inst/tools/prepare_exSTRa_input_db.R @@ -25,7 +25,7 @@ humandb_annovar_dir <- ".../path/to/.../humandb" # --------------------------------------------------------------------------------------- # Read simpleRepeat table -simpleRepeat <- readr::read_delim("simpleRepeat.txt.gz", delim="\t", col_names=FALSE) +simpleRepeat <- readr::read_delim(simpleRepeat_file, delim="\t", col_names=FALSE) colnames(simpleRepeat) <- c("bin", "chrom", "chromStart", "chromEnd", "name", "period", "copyNum", "consensusSize", "perMatch", "perIndel", "score", "A", "C", "G", "T", "entropy", "sequence") simpleRepeat <- as.data.frame(simpleRepeat, stringsAsFactors=FALSE) @@ -48,7 +48,7 @@ simpleRepeat_avinput$alt <- "0" write.table(simpleRepeat_avinput, file="simpleRepeat.avinput", sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) # Run ANNOVAR -system(past0("perl ", table_annovar_script, " simpleRepeat.avinput ", humandb_annovar_dir, " -buildver hg19 -out simpleRepeat_annovar -remove -protocol refGene -operation g -nastring .")) +system(paste0("perl ", table_annovar_script, " simpleRepeat.avinput ", humandb_annovar_dir, " -buildver hg19 -out simpleRepeat_annovar -remove -protocol refGene -operation g -nastring .")) # Load ANNOVAR input simpleRepeat_annovar <- read.delim("simpleRepeat_annovar.hg19_multianno.txt", sep="\t", header=TRUE, stringsAsFactors=FALSE) From db9981f9862e7a901767c52ad74abc7ab9d2547e Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Mon, 20 May 2019 16:43:45 +0800 Subject: [PATCH 12/32] build bug fix Former-commit-id: 4bdb256beb7973fa80fc11c22572132c0567371a --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c7c0c10..ce4e122 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,7 @@ Imports: stringr (>= 1.2.0), reshape2 (>= 1.4.3), magrittr (>= 1.5), - RColorBrewer (>= 1.1-2) + RColorBrewer (>= 1.1-2), ggplot2 Suggests: testthat (>= 2.0.0), From b182282cb8cbaa5c34be3925ce1d26c9e2865602 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Thu, 23 May 2019 12:09:36 +0800 Subject: [PATCH 13/32] so code doesn't run Former-commit-id: 5dfd91964e54e4eafff7339edf2c110e5ba15a0e --- R/munoz_rueda_al1.R | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/R/munoz_rueda_al1.R b/R/munoz_rueda_al1.R index 2af7086..8b98476 100644 --- a/R/munoz_rueda_al1.R +++ b/R/munoz_rueda_al1.R @@ -74,26 +74,26 @@ munoz_rueda_al1_include <- function(x, extra.missing = 0, ...) { } } -munoz_rueda_al1(c(1, 5, 7, 5, 4, 8, 3)) - -munoz_rueda_al1(c(1, 5, 7, 5, 4, NA, 8, 3)) - -munoz_rueda_al1(c(1, 5, 7, NA, 5, NA, 4, 8, 3)) - -munoz_rueda_al1(c(1, 5, 7, NA, 5, NA, 4, 8, 3), 10) -munoz_rueda_al1(c(1, 5, 7, NA, 5, NA, 4, 8, 3), 10, names = FALSE) - -munoz_rueda_al1_include(c(1, 5, 7, NA, 5, NA, 4, 8, 3), 10) - -munoz_rueda_al1_include(c(1, 5, 7, NA, 5, NA, 4, 8, 3)) - -munoz_rueda_al1_include(c(NA, NA, 1, 5, 7, NA, 5, NA, 4, 8, NA, 3)) - -munoz_rueda_al1_include(numeric(0), 15) +# munoz_rueda_al1(c(1, 5, 7, 5, 4, 8, 3)) +# +# munoz_rueda_al1(c(1, 5, 7, 5, 4, NA, 8, 3)) +# +# munoz_rueda_al1(c(1, 5, 7, NA, 5, NA, 4, 8, 3)) +# +# munoz_rueda_al1(c(1, 5, 7, NA, 5, NA, 4, 8, 3), 10) +# munoz_rueda_al1(c(1, 5, 7, NA, 5, NA, 4, 8, 3), 10, names = FALSE) +# +# munoz_rueda_al1_include(c(1, 5, 7, NA, 5, NA, 4, 8, 3), 10) +# +# munoz_rueda_al1_include(c(1, 5, 7, NA, 5, NA, 4, 8, 3)) +# +# munoz_rueda_al1_include(c(NA, NA, 1, 5, 7, NA, 5, NA, 4, 8, NA, 3)) +# +# munoz_rueda_al1_include(numeric(0), 15) # Example where different k is chosen -munoz_rueda_al1(c(1, 5, 7, NA, 5, NA, 4, 8, 3)) -munoz_rueda_al1(c(1, 5, 7, NA, 5, NA, 8, 3)) +# munoz_rueda_al1(c(1, 5, 7, NA, 5, NA, 4, 8, 3)) +# munoz_rueda_al1(c(1, 5, 7, NA, 5, NA, 8, 3)) # y <- c(31L, 31L, 34L, 26L, 16L, 27L, 27L, 34L, 27L, 33L, 24L, 11L, From a026f58443c769b37f8bf42afb031f039d99b975 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Thu, 23 May 2019 12:09:56 +0800 Subject: [PATCH 14/32] parallizing by locus Former-commit-id: a67dd8c7bbb2a4766a38f3001831f9b263c60060 --- R/tsum_test.R | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/R/tsum_test.R b/R/tsum_test.R index 829a8a6..f120449 100644 --- a/R/tsum_test.R +++ b/R/tsum_test.R @@ -165,20 +165,32 @@ tsum_test <- function(strscore, } # Generate T sum statistic - T_stats_list <- vector('list', length(loci(strscore))) - names(T_stats_list) <- loci(strscore) - for(loc in loci(strscore)) { - message("Working on locus ", loc) - strscore_loc <- strscore[loc] - T_stats_loc <- tsum_statistic_1locus(strscore_loc, min.quant = min.quant, - case_control = case_control, trim = trim, - give.pvalue = give.pvalue, B = B, - parallel = parallel, # TRUE for cluster - cluster = cluster, # a cluster object - early_stop = early_stop, early_A = early_A, min_stop = early_stop_min - ) + if(parallel) { + X_loci <- loci(strscore) + names(X_loci) <- X_loci + T_stats_list <- parLapply(cl = cluster, + X_loci, + tsum_statistic_1locus, + strscore, + min.quant = min.quant, + case_control = case_control, trim = trim, + give.pvalue = give.pvalue, B = B, + early_stop = early_stop, early_A = early_A, min_stop = early_stop_min) - T_stats_list[[loc]] <- T_stats_loc + } else { + T_stats_list <- vector('list', length(loci(strscore))) + names(T_stats_list) <- loci(strscore) + for(loc in loci(strscore)) { + message("Working on locus ", loc) + T_stats_loc <- tsum_statistic_1locus(loc, strscore, + min.quant = min.quant, + case_control = case_control, trim = trim, + give.pvalue = give.pvalue, B = B, + early_stop = early_stop, early_A = early_A, min_stop = early_stop_min + ) + + T_stats_list[[loc]] <- T_stats_loc + } } T_stats <- rbindlist(T_stats_list, idcol = "locus") @@ -222,7 +234,8 @@ tsum_test <- function(strscore, #' @import testit #' @import parallel tsum_statistic_1locus <- function( - strscore_loc, + loc, + strscore, case_control = FALSE, min.quant = 0, trim = 0, @@ -234,6 +247,7 @@ tsum_statistic_1locus <- function( early_A = 0.25, min_stop = 50) { + strscore_loc <- strscore[loc] qm <- make_quantiles_matrix(strscore_loc, sample = NULL, method = "quantile7") From 174061b3922f8ae51f6e3bd5712aeb9a3e281d6b Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Thu, 23 May 2019 12:25:44 +0800 Subject: [PATCH 15/32] copies whole strscore object, but without apparent benefit Former-commit-id: 42d7f0498723fd9066e7a2508d7314708b236d1f --- R/tsum_test.R | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/R/tsum_test.R b/R/tsum_test.R index f120449..c492db0 100644 --- a/R/tsum_test.R +++ b/R/tsum_test.R @@ -166,12 +166,16 @@ tsum_test <- function(strscore, # Generate T sum statistic if(parallel) { - X_loci <- loci(strscore) - names(X_loci) <- X_loci + all_loci <- loci(strscore) + names(all_loci) <- all_loci + # Save copying the whole object each time + tsum_statistic_1locus_insider <- function(...) { + tsum_statistic_1locus(strscore = get("strscore", envir = .GlobalEnv), ...) + } + clusterExport(cl = cluster, c("strscore", "tsum_statistic_1locus_insider"), envir=environment()) T_stats_list <- parLapply(cl = cluster, - X_loci, - tsum_statistic_1locus, - strscore, + all_loci, + tsum_statistic_1locus_insider, min.quant = min.quant, case_control = case_control, trim = trim, give.pvalue = give.pvalue, B = B, @@ -182,6 +186,8 @@ tsum_test <- function(strscore, names(T_stats_list) <- loci(strscore) for(loc in loci(strscore)) { message("Working on locus ", loc) + # The following uses the strscore variable from the environment + T_stats_loc <- tsum_statistic_1locus(loc, strscore, min.quant = min.quant, case_control = case_control, trim = trim, @@ -235,7 +241,7 @@ tsum_test <- function(strscore, #' @import parallel tsum_statistic_1locus <- function( loc, - strscore, + strscore, case_control = FALSE, min.quant = 0, trim = 0, From 620fcca6c047f3ff919d8d345ee5beedb659e188 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Thu, 23 May 2019 12:35:35 +0800 Subject: [PATCH 16/32] limit large copies for ParLapply Former-commit-id: 97cc9f65b56559cd34c5d7f9dde378d6cb4e1fc0 --- R/tsum_test.R | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/R/tsum_test.R b/R/tsum_test.R index c492db0..4e665c8 100644 --- a/R/tsum_test.R +++ b/R/tsum_test.R @@ -169,13 +169,10 @@ tsum_test <- function(strscore, all_loci <- loci(strscore) names(all_loci) <- all_loci # Save copying the whole object each time - tsum_statistic_1locus_insider <- function(...) { - tsum_statistic_1locus(strscore = get("strscore", envir = .GlobalEnv), ...) - } - clusterExport(cl = cluster, c("strscore", "tsum_statistic_1locus_insider"), envir=environment()) + strscore_loc_list <- lapply(all_loci, function(loc) {strscore[loc]}) T_stats_list <- parLapply(cl = cluster, - all_loci, - tsum_statistic_1locus_insider, + strscore_loc_list, + tsum_statistic_1locus, min.quant = min.quant, case_control = case_control, trim = trim, give.pvalue = give.pvalue, B = B, @@ -186,9 +183,8 @@ tsum_test <- function(strscore, names(T_stats_list) <- loci(strscore) for(loc in loci(strscore)) { message("Working on locus ", loc) - # The following uses the strscore variable from the environment - - T_stats_loc <- tsum_statistic_1locus(loc, strscore, + T_stats_loc <- tsum_statistic_1locus( + strscore_loc = strscore[loc], min.quant = min.quant, case_control = case_control, trim = trim, give.pvalue = give.pvalue, B = B, @@ -240,8 +236,7 @@ tsum_test <- function(strscore, #' @import testit #' @import parallel tsum_statistic_1locus <- function( - loc, - strscore, + strscore_loc, case_control = FALSE, min.quant = 0, trim = 0, @@ -253,7 +248,6 @@ tsum_statistic_1locus <- function( early_A = 0.25, min_stop = 50) { - strscore_loc <- strscore[loc] qm <- make_quantiles_matrix(strscore_loc, sample = NULL, method = "quantile7") From bb9df80d306845b0108111b7f7b05ed119dc79fa Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Thu, 23 May 2019 12:55:15 +0800 Subject: [PATCH 17/32] OSX R devel now succeeds, so now this build must pass Former-commit-id: 6d5387f7a6f036c86464d8c69dcb83018ace6216 --- .travis.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 76ce106..1af3714 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,9 +18,6 @@ matrix: - os: windows r: release allow_failures: - # devel on osx fails - - os: osx - r: devel - os: windows r: release From 16c872ad6af7925034942735a37755235631cf96 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Thu, 23 May 2019 16:47:09 +0800 Subject: [PATCH 18/32] correcting manual page for plot.exstra_score Former-commit-id: a909a674c8204efb04e5964844917c191c711e67 --- R/CLASS_exstra_score.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CLASS_exstra_score.R b/R/CLASS_exstra_score.R index 05ad78f..8ce697e 100644 --- a/R/CLASS_exstra_score.R +++ b/R/CLASS_exstra_score.R @@ -176,7 +176,7 @@ plot_names.exstra_score <- function(x, names = NULL) { #' Plot ECDF curves from an exstra_score object #' #' -#' @param rsc exstra_score object. +#' @param x exstra_score object. #' @param loci character; Only plot at the loci specified/ #' No effect if NULL. #' @param sample_col Specify the colours for the given samples as a named character vector. From 590c7676e5ffb7f7d84ae294ccfab98eee16b17d Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Fri, 24 May 2019 11:43:11 +0800 Subject: [PATCH 19/32] tsum test manual page fix Former-commit-id: f3d239a52df91c6d37d6f36156b5f4a6de143ba3 --- R/tsum_test.R | 4 ++-- man/tsum_test.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/tsum_test.R b/R/tsum_test.R index 4e665c8..c164b4f 100644 --- a/R/tsum_test.R +++ b/R/tsum_test.R @@ -27,9 +27,9 @@ #' required cluster. #' If cluster is specified then this option makes no difference. #' @param cluster_n If parallel is TRUE, then the number of nodes in the cluster is -#' automatically set as 1 less than those available on your machine. +#' automatically set as half of those available on your machine #' (but never less than 1). This option allows manual setting of the -#' number of nodes, either less to free up other resources, or more to +#' number of nodes, either less to free up other resources, or to #' maximize available resources. #' If cluster is specified then this option makes no difference. #' @param cluster A cluster object from the parallel package. Use if you wish to set up diff --git a/man/tsum_test.Rd b/man/tsum_test.Rd index 90d272c..81c7830 100644 --- a/man/tsum_test.Rd +++ b/man/tsum_test.Rd @@ -49,9 +49,9 @@ required cluster. If cluster is specified then this option makes no difference.} \item{cluster_n}{If parallel is TRUE, then the number of nodes in the cluster is - automatically set as 1 less than those available on your machine. + automatically set as half of those available on your machine (but never less than 1). This option allows manual setting of the - number of nodes, either less to free up other resources, or more to + number of nodes, either less to free up other resources, or to maximize available resources. If cluster is specified then this option makes no difference.} From e80a0a425edc9dde75ddaf39610b47e0e3a7dd23 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Fri, 24 May 2019 11:44:06 +0800 Subject: [PATCH 20/32] fix plot.exstra_score parameter name Former-commit-id: 10d810490f1b7e7884bc9fef6d8a064ec640f569 --- man/plot.exstra_score.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/plot.exstra_score.Rd b/man/plot.exstra_score.Rd index 18ca7b2..01288cc 100644 --- a/man/plot.exstra_score.Rd +++ b/man/plot.exstra_score.Rd @@ -11,6 +11,8 @@ main = NULL, ...) } \arguments{ +\item{x}{exstra_score object.} + \item{loci}{character; Only plot at the loci specified/ No effect if NULL.} @@ -47,8 +49,6 @@ If FALSE, samples with missing sex will also be filtered when xlinked is "male" upper x-axis limit.} \item{...}{Further arguments to the \code{plot.ecdf} function.} - -\item{rsc}{exstra_score object.} } \description{ Plot ECDF curves from an exstra_score object From ac7a0b480eefbb608e87df7ce56aa7273c5fcb9f Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Fri, 24 May 2019 11:44:23 +0800 Subject: [PATCH 21/32] include checkmate package Former-commit-id: 0fb009a5dd7f6ce792ea20674d8520f18b2ff76a --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index ce4e122..ccc4286 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,6 +9,7 @@ Description: Detecting expansions with paired-end Illumina sequencing data. License: GPL-2 Imports: testit (>= 0.7), + checkmate, data.table (>= 1.10.4-3), stringr (>= 1.2.0), reshape2 (>= 1.4.3), From a0e3b919b0747c7e2efab82bcd432209687ab8c7 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Fri, 24 May 2019 11:44:48 +0800 Subject: [PATCH 22/32] allow bonferroni.size parameter to actually work Former-commit-id: 68411615a38eba28b5d25e5ba916c8df92148322 --- R/tsum_p_value_summary.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/tsum_p_value_summary.R b/R/tsum_p_value_summary.R index 41b4ed2..99fb6e8 100644 --- a/R/tsum_p_value_summary.R +++ b/R/tsum_p_value_summary.R @@ -28,11 +28,12 @@ tsum_p_value_summary <- function(tsum, if(sum(tsum$stats$p.value < 0, na.rm = TRUE)) { stop("Some p-values appear to be below 0. This may be an exSTRa package bug.") } + assert_number(bonferroni.size, lower = 1, null.ok = TRUE) # output <- data.table(alpha = c(p, 1, NA)) ps <- c(-0.1, p, 1) if(bonferroni) { - ps.bf <- c(-0.1, p / tsum$n_tests, 1) + ps.bf <- c(-0.1, p / ifelse(is.null(bonferroni.size), tsum$n_tests, bonferroni.size), 1) output$bf <- 0L tab <- table(.bincode(tsum$stats$p.value, ps.bf), useNA = "always") output[as.integer(names(tab)), bf := as.integer(tab)] From f545fdd7961866b63a094bd01bd38c9cd6770603 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Fri, 24 May 2019 12:03:58 +0800 Subject: [PATCH 23/32] moving Tsum_stats to test setup for general use Former-commit-id: 97e1c47e918e988e5463ae4e23113dec338d8e41 --- tests/testthat/setup_tsum_object.R | 6 ++++++ tests/testthat/test_tsum_test.R | 2 -- 2 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 tests/testthat/setup_tsum_object.R diff --git a/tests/testthat/setup_tsum_object.R b/tests/testthat/setup_tsum_object.R new file mode 100644 index 0000000..efa96a0 --- /dev/null +++ b/tests/testthat/setup_tsum_object.R @@ -0,0 +1,6 @@ +context("Building tsum object") + +set.seed(2017) +#exstra_tsum <- tsum_test(exstra_wgs_pcr_2) + +Tsum_stats <- tsum_test(exstra_wgs_pcr_2[c("HD", "SCA6")], give.pvalue = FALSE) diff --git a/tests/testthat/test_tsum_test.R b/tests/testthat/test_tsum_test.R index 33a4ab1..392b595 100644 --- a/tests/testthat/test_tsum_test.R +++ b/tests/testthat/test_tsum_test.R @@ -1,7 +1,5 @@ context("Testing tsum_test() functions") -Tsum_stats <- tsum_test(exstra_wgs_pcr_2[c("HD", "SCA6")], give.pvalue = FALSE) - test_that("tsum_test() may not be giving T statistics", { expect_equal(dim(Tsum_stats$stats)[1], 36L) expect_equal(length(unique(Tsum_stats$stats$locus)), 2L) From b4b84f2f0db890923f355b6b6aaec683fafe77a6 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Fri, 24 May 2019 12:09:45 +0800 Subject: [PATCH 24/32] get tsum for 4 loci Former-commit-id: 566062f42fdbed08ef54b8215ec3ebb597375ab7 --- tests/testthat/setup_tsum_object.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/setup_tsum_object.R b/tests/testthat/setup_tsum_object.R index efa96a0..c2fd1a1 100644 --- a/tests/testthat/setup_tsum_object.R +++ b/tests/testthat/setup_tsum_object.R @@ -1,6 +1,6 @@ context("Building tsum object") set.seed(2017) -#exstra_tsum <- tsum_test(exstra_wgs_pcr_2) +tsum_4 <- tsum_test(exstra_wgs_pcr_2[c("HD", "SCA6", "SCA1", "FRDA")]) Tsum_stats <- tsum_test(exstra_wgs_pcr_2[c("HD", "SCA6")], give.pvalue = FALSE) From d95a77940a436b0658917f1d446daec4ccd87e1f Mon Sep 17 00:00:00 2001 From: "Rick Tankard (Hypatia)" Date: Fri, 24 May 2019 15:26:46 +0800 Subject: [PATCH 25/32] making tsum_test more fault resistant in parallel Former-commit-id: 6080c3405b210d0039f2420b03e9553d9aa5948b --- R/tsum_test.R | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/R/tsum_test.R b/R/tsum_test.R index 4e665c8..243cf3d 100644 --- a/R/tsum_test.R +++ b/R/tsum_test.R @@ -169,14 +169,21 @@ tsum_test <- function(strscore, all_loci <- loci(strscore) names(all_loci) <- all_loci # Save copying the whole object each time - strscore_loc_list <- lapply(all_loci, function(loc) {strscore[loc]}) - T_stats_list <- parLapply(cl = cluster, + strscore_loc_list <- lapply(all_loci, function(loc) { strscore[loc]} ) + tsum_statistic_1locus_failsafe <- function(...) { + tryCatch(tsum_statistic_1locus(...), error = function(e) { NULL }) + } + T_stats_list <- parLapplyLB(cl = cluster, strscore_loc_list, - tsum_statistic_1locus, + tsum_statistic_1locus_failsafe, min.quant = min.quant, case_control = case_control, trim = trim, give.pvalue = give.pvalue, B = B, - early_stop = early_stop, early_A = early_A, min_stop = early_stop_min) + early_stop = early_stop, early_A = early_A, min_stop = early_stop_min, + verbose = FALSE) + + # Remove errored runs + T_stats_list <- T_stats_list[!sapply(T_stats_list, is.null)] } else { T_stats_list <- vector('list', length(loci(strscore))) @@ -246,7 +253,8 @@ tsum_statistic_1locus <- function( cluster = NULL, early_stop = TRUE, early_A = 0.25, - min_stop = 50) + min_stop = 50, + verbose = TRUE) { qm <- make_quantiles_matrix(strscore_loc, sample = NULL, @@ -434,7 +442,7 @@ tsum_statistic_1locus <- function( p_smallest <- (sum(sim_T[1L:N_tss] > tsum_max) + 1) / (N_tss + 1) p.value.sd <- p_value_sd_(p_smallest, B_used, N) # if(p_smallest - p.value.sd * 2 > 0.01) { - if(p.value.sd < early_A * p_smallest) { + if(verbose && p.value.sd < early_A * p_smallest) { message(" Reduced replicates to ", B_used, ".") break } From c520e168eccf4f132c91f4432261e8d3a2b85c45 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Fri, 24 May 2019 16:48:37 +0800 Subject: [PATCH 26/32] use full function name checkmate::assert_number Former-commit-id: 9b89e14c8d1f5d5b8ac548149d78e67f10ebc795 --- R/tsum_p_value_summary.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tsum_p_value_summary.R b/R/tsum_p_value_summary.R index 99fb6e8..77967f3 100644 --- a/R/tsum_p_value_summary.R +++ b/R/tsum_p_value_summary.R @@ -28,7 +28,7 @@ tsum_p_value_summary <- function(tsum, if(sum(tsum$stats$p.value < 0, na.rm = TRUE)) { stop("Some p-values appear to be below 0. This may be an exSTRa package bug.") } - assert_number(bonferroni.size, lower = 1, null.ok = TRUE) + checkmate::assert_number(bonferroni.size, lower = 1, null.ok = TRUE) # output <- data.table(alpha = c(p, 1, NA)) ps <- c(-0.1, p, 1) From c71d80da22b5ffd70bbae4f7b53af4becf8a90df Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Fri, 24 May 2019 16:49:39 +0800 Subject: [PATCH 27/32] delete the read and delete me file Former-commit-id: 56a0c24b3f6551075abe07a386fb4bbb7f101c42 --- Read-and-delete-me | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 Read-and-delete-me diff --git a/Read-and-delete-me b/Read-and-delete-me deleted file mode 100644 index dcaca43..0000000 --- a/Read-and-delete-me +++ /dev/null @@ -1,8 +0,0 @@ -* Edit the help file skeletons in 'man', possibly combining help files for multiple functions. -* Edit the exports in 'NAMESPACE', and add necessary imports. -* Put any C/C++/Fortran code in 'src'. -* If you have compiled code, add a useDynLib() directive to 'NAMESPACE'. -* Run R CMD build to build the package tarball. -* Run R CMD check to check the package tarball. - -Read "Writing R Extensions" for more information. From 074c2ab4b1014cca88e921a72d970ac2afadf265 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Fri, 24 May 2019 17:23:30 +0800 Subject: [PATCH 28/32] tsum_statistic_1locus() manual page added verbose option Former-commit-id: 152f1d4815e44c1d053549d1531c25c5e062b0ac --- man/tsum_statistic_1locus.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/tsum_statistic_1locus.Rd b/man/tsum_statistic_1locus.Rd index 6d99aed..109dce2 100644 --- a/man/tsum_statistic_1locus.Rd +++ b/man/tsum_statistic_1locus.Rd @@ -7,7 +7,7 @@ tsum_statistic_1locus(strscore_loc, case_control = FALSE, min.quant = 0, trim = 0, give.pvalue = FALSE, B = 999, parallel = FALSE, cluster = NULL, early_stop = TRUE, - early_A = 0.25, min_stop = 50) + early_A = 0.25, min_stop = 50, verbose = TRUE) } \description{ Private function From 350386b7b1558b57b3b2509e412952e1292df4a4 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Fri, 24 May 2019 17:24:41 +0800 Subject: [PATCH 29/32] Bug fix for plot.exstra_tsum(). Now uses the object `x` given to it, rather than the global `tsum`. Former-commit-id: f6217c51119563a34d4f98a235f8b988ec9f5f52 --- R/CLASS_exstra_tsum.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/CLASS_exstra_tsum.R b/R/CLASS_exstra_tsum.R index 641feb5..57c9584 100644 --- a/R/CLASS_exstra_tsum.R +++ b/R/CLASS_exstra_tsum.R @@ -150,11 +150,11 @@ plot.exstra_tsum <- function(x, loci = NULL, sample_col = NULL, alpha_nonsignif = 0.25, ...) { # check input - assert("tsum should be an exstra_tsum object.", is.exstra_tsum(tsum)) + assert("x should be an exstra_tsum object.", is.exstra_tsum(x)) if(!is.null(loci)) { assert("loci should be a character vector", is.vector(loci), is.character(loci)) # only work on loci we want to plot - tsum <- tsum[loci] + x <- x[loci] } if(!is.null(sample_col)) { assert("sample_col should be a character vector", is.vector(sample_col), @@ -165,7 +165,7 @@ plot.exstra_tsum <- function(x, loci = NULL, sample_col = NULL, # construct colours if(is.null(correction) && is.null(alpha)) { # Use the samples marked as significant - ps <- tsum$stats[identity(signif)] + ps <- x$stats[identity(signif)] } else { if(is.null(correction)) { correction <- "bonferroni" @@ -173,10 +173,10 @@ plot.exstra_tsum <- function(x, loci = NULL, sample_col = NULL, if(is.null(alpha)) { alpha <- 0.05 } - ps <- p_values(tsum, correction = correction, alpha = alpha, only.signif = TRUE) + ps <- p_values(x, correction = correction, alpha = alpha, only.signif = TRUE) } significant_sample_colours <- list() - for(loc in loci(tsum)) { + for(loc in loci(x)) { # TODO this.ps <- ps[loc] if(is.null(sample_col)) { @@ -198,8 +198,8 @@ plot.exstra_tsum <- function(x, loci = NULL, sample_col = NULL, } # Do the plot: - # TODO: as.exstra_score(tsum) - plot_many_str_score(as.exstra_score(tsum), loci = loci, + # TODO: as.exstra_score(x) + plot_many_str_score(as.exstra_score(x), loci = loci, plot_cols = significant_sample_colours, controls_label = controls_label, alpha_control = alpha_nonsignif, ...) From 55d4c6d22f83f6f72801400b28b597acb8dbc6b5 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Mon, 27 May 2019 14:28:10 +0800 Subject: [PATCH 30/32] version bump Former-commit-id: 47228567cd478ff72e128a0a0aeeee0d2694615f --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ccc4286..8a93d18 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: exSTRa Type: Package Title: Expanded STR algorithm: detecting expansions in Illumina sequencing data -Version: 0.88.5 +Version: 0.88.6 Date: 2019-01-21 Author: Rick Tankard Maintainer: Rick Tankard From 8975aaace5e44c00c21216aad341a81938531ef8 Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Mon, 27 May 2019 14:33:34 +0800 Subject: [PATCH 31/32] Update tsum_test warning message. Former-commit-id: 7fade63958613c65863528c2d2236feca0e6b277 --- R/tsum_test.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/tsum_test.R b/R/tsum_test.R index b2e3d63..dfac3bf 100644 --- a/R/tsum_test.R +++ b/R/tsum_test.R @@ -99,9 +99,8 @@ tsum_test <- function(strscore, # Warning for parallel usage if(parallel) { warning("Use of tsum_test(parallel = TRUE) may not be beneficial.\n", - " This is due to optimizations in serial code, such that the overhead of parallization\n", - " outweights any benefit. Future implementations may parallize over loci, thus\n", - " significantly reducing overhead.", + " Optimizations in serial code have reduced the benefits of parallization.\n", + " Additionally, often R jobs tend to be idle which has not been resolved.", immediate. = TRUE) } From 50b510b47740aeca2e8f9534105b42f1ff56a0ea Mon Sep 17 00:00:00 2001 From: Rick Tankard Date: Tue, 28 May 2019 10:48:59 +0800 Subject: [PATCH 32/32] Revert "OSX R devel now succeeds, so now this build must pass" This reverts commit bb9df80d306845b0108111b7f7b05ed119dc79fa [formerly 6d5387f7a6f036c86464d8c69dcb83018ace6216]. Former-commit-id: 80e974ed2d58b63f2cb145f34967dedaf76e94e3 --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 1af3714..76ce106 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,6 +18,9 @@ matrix: - os: windows r: release allow_failures: + # devel on osx fails + - os: osx + r: devel - os: windows r: release