Merge pull request #36 from bahlolab/devel_tankard

exSTRa version 0.87 Former-commit-id: ff3a03b
bahlolab · Jul 28, 2018 · 6d18ffd · 6d18ffd
2 parents 47b45cb + 8633025
commit 6d18ffd
Show file tree

Hide file tree

Showing 9 changed files with 44 additions and 38 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: exSTRa
 Type: Package
 Title: Expanded STR algorithm: detecting expansions in Illumina sequencing data
-Version: 0.86
-Date: 2018-07-13
+Version: 0.87
+Date: 2018-07-28
 Author: Rick Tankard
 Maintainer: Rick Tankard <[email protected]>
 Description: Detecting expansions with paired-end Illumina sequencing data.

diff --git a/R/CLASS_exstra_tsum.R b/R/CLASS_exstra_tsum.R
@@ -122,6 +122,9 @@ print.exstra_tsum <- function(x, ...) {
 #' 
 #' @seealso \code{\link{plot.exstra_score}}
 #' 
+#' @import data.table
+#' @import stringr
+#' @import testit
 #' @export
 plot.exstra_tsum <- function(tsum, loci = NULL, sample_col = NULL, 
   correction = NULL, alpha = NULL, # when NULL, use significance as-is

diff --git a/README.md b/README.md
@@ -33,16 +33,20 @@ Currently makes extensive use of the data.table package, and understanding its u
 
 # Examples
 
-Please see the included `examples/exSTRa_score_analysis.R` script for a example analysis. 
-Other datasets should be analysed in a similar way after processing with the Perl 
+Please see the exSTRa vignette for a workable example; 
+this can be viewed in R after installation of the package with
+```
+vignette("exSTRa")
+```
+or can be downloaded in the repository from [inst/doc/exSTRa.html](inst/doc/exSTRa.html) (in Github, the vignette cannot be viewed directly and should be downloaded).
+
+Other datasets in BAM/CRAM format can be analysed in a similar way after processing with the Perl 
 [Bio::STR::exSTRa](https://github.com/bahlolab/Bio-STR-exSTRa) package. 
 
 # Citation
 
-Rick M. Tankard, Martin B. Delatycki, Paul J. Lockhart, 
-         Melanie Bahlo. 
-         **Detecting known repeat expansions with standard protocol next generation 
-         sequencing, towards developing a single screening test for neurological repeat 
-         expansion disorders**. 
-         *bioRxiv* 157792; 
+Rick M. Tankard, Mark F Bennett, Peter Degorski, Martin B. Delatycki, 
+        Paul J. Lockhart, Melanie Bahlo 
+         **Detecting tandem repeat expansions in cohorts sequenced with short-read sequencing data**. 
+         *bioRxiv* 157792 (2018);
          doi: https://doi.org/10.1101/157792
diff --git a/examples/exSTRa_score_analysis.R b/examples/exSTRa_score_analysis.R
@@ -73,9 +73,3 @@ plot(tsum, sample_col = plot_cols, correction = "loci")
 ps[identity(signif)]
 # or with the only.signif option:
 p_values(tsum, only.signif = TRUE, correction = "samples")
-
-# Give the best hit(s) for each sample:
-# TODO: what is best for display may not be the best for internal representation. 
-#       For now may keep this as best for display and inspection, while users who wish to 
-#       filter should use the p_values() function on an exstra_tsum object
-# best_hits(tsum)
diff --git a/inst/doc/exSTRa.R b/inst/doc/exSTRa.R
@@ -56,16 +56,20 @@ pie(rep(1, length(plot_cols)), col = plot_cols, labels = names(plot_cols), cex =
 
 ## ---- out.width = '82%', fig.width=12, fig.height=12---------------------
 par(mfrow = c(2, 2))
-plot(tsum, sample_col = plot_cols, correction = "locus")
+plot(tsum, sample_col = plot_cols, correction = "samples")
+
+## ---- out.width = '82%', fig.width=12, fig.height=12---------------------
+par(mfrow = c(2, 2))
+plot(tsum, sample_col = plot_cols, correction = "loci")
 
 ## ------------------------------------------------------------------------
-(ps <- p_values(tsum, correction = "locus"))
+(ps <- p_values(tsum, correction = "samples"))
 
 ## ------------------------------------------------------------------------
 ps[identity(signif)]
 
 ## ------------------------------------------------------------------------
-p_values(tsum, only.signif = TRUE, correction = "locus")
+p_values(tsum, only.signif = TRUE, correction = "samples")
 
 ## ------------------------------------------------------------------------
 exstra_wgs_pcr_2["HD"]

diff --git a/inst/doc/exSTRa.Rmd b/inst/doc/exSTRa.Rmd
@@ -25,12 +25,10 @@ exSTRa supports both whole-genome and whole-exome sequencing (WGS and WES).
 Only paired-end data is supported. 
 This package implements the algorithm as described in:
 
-> Rick M. Tankard, Martin B. Delatycki, Paul J. Lockhart, 
->          Melanie Bahlo. 
->          **Detecting known repeat expansions with standard protocol next generation 
->          sequencing, towards developing a single screening test for neurological repeat 
->          expansion disorders**. 
->          *bioRxiv* 157792; 
+> Rick M. Tankard, Mark F Bennett, Peter Degorski, Martin B. Delatycki, 
+>         Paul J. Lockhart, Melanie Bahlo 
+>          **Detecting tandem repeat expansions in cohorts sequenced with short-read sequencing data**. 
+>          *bioRxiv* 157792 (2018);
 >          doi: https://doi.org/10.1101/157792
 
 A table of repeat expansion disorders for the human genome reference hg19 is included.
@@ -211,18 +209,23 @@ with the default number of simulations (9999).
 This can be adjusted with the `B` parameter of `tsum_test()`, or a less stringent
 threshold can be used.
 Bonferroni correction is too severe here, so we can specify Bonferroni correction only 
-on each locus.
+on each locus for the number of samples tested.
 ```{r, out.width = '82%', fig.width=12, fig.height=12}
 par(mfrow = c(2, 2))
-plot(tsum, sample_col = plot_cols, correction = "locus")
+plot(tsum, sample_col = plot_cols, correction = "samples")
 ```
 
+Or Bonferroni correction may be applied for the number of loci tested.
+```{r, out.width = '82%', fig.width=12, fig.height=12}
+par(mfrow = c(2, 2))
+plot(tsum, sample_col = plot_cols, correction = "loci")
+```
 
 You may obtain a data.table of each sample and locus with the p-value, and if it is significant
 with the correction method applied.
 Here, the correction method is Bonferroni per locus.
 ```{r}
-(ps <- p_values(tsum, correction = "locus"))
+(ps <- p_values(tsum, correction = "samples"))
 ```
 
 To obtain only the significant samples, you can either use data.table subsetting:
@@ -232,7 +235,7 @@ ps[identity(signif)]
 
 or when retrieving the data.table from p_values():
 ```{r}
-p_values(tsum, only.signif = TRUE, correction = "locus")
+p_values(tsum, only.signif = TRUE, correction = "samples")
 ```
 
 

diff --git a/inst/doc/exSTRa.html.REMOVED.git-id b/inst/doc/exSTRa.html.REMOVED.git-id
@@ -1 +1 @@
-7c8ca81187c23abb699bc838fe147803a3723e2a
+ce373b241cba51fca709149b71cb094a702063aa
diff --git a/inst/doc/simpleRepeats.html b/inst/doc/simpleRepeats.html
@@ -12,7 +12,7 @@
 
 <meta name="author" content="Rick Tankard" />
 
-<meta name="date" content="2018-06-06" />
+<meta name="date" content="2018-07-28" />
 
 <title>Using UCSC Simple Repeats</title>
 
@@ -32,7 +32,7 @@
 
 <h1 class="title toc-ignore">Using UCSC Simple Repeats</h1>
 <h4 class="author"><em>Rick Tankard</em></h4>
-<h4 class="date"><em>2018-06-06</em></h4>
+<h4 class="date"><em>2018-07-28</em></h4>
 
 
 

diff --git a/vignettes/exSTRa.Rmd b/vignettes/exSTRa.Rmd
@@ -25,12 +25,10 @@ exSTRa supports both whole-genome and whole-exome sequencing (WGS and WES).
 Only paired-end data is supported. 
 This package implements the algorithm as described in:
 
-> Rick M. Tankard, Martin B. Delatycki, Paul J. Lockhart, 
->          Melanie Bahlo. 
->          **Detecting known repeat expansions with standard protocol next generation 
->          sequencing, towards developing a single screening test for neurological repeat 
->          expansion disorders**. 
->          *bioRxiv* 157792; 
+> Rick M. Tankard, Mark F Bennett, Peter Degorski, Martin B. Delatycki, 
+>         Paul J. Lockhart, Melanie Bahlo 
+>          **Detecting tandem repeat expansions in cohorts sequenced with short-read sequencing data**. 
+>          *bioRxiv* 157792 (2018);
 >          doi: https://doi.org/10.1101/157792
 
 A table of repeat expansion disorders for the human genome reference hg19 is included.
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		7c8ca81187c23abb699bc838fe147803a3723e2a
		ce373b241cba51fca709149b71cb094a702063aa