Skip to content

Commit

Permalink
Merge pull request #32 from bahlolab/devel_tankard
Browse files Browse the repository at this point in the history
Documentation update

Former-commit-id: 6725b3f
  • Loading branch information
trickytank authored Jan 17, 2018
2 parents d67a72b + b4c9e48 commit 8b32271
Show file tree
Hide file tree
Showing 23 changed files with 676 additions and 38 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: exSTRa
Type: Package
Title: Expanded STR algorithm: detecting expansions in Illumina sequencing data
Version: 0.83
Date: 2017-11-06
Version: 0.84
Date: 2018-01-17
Author: Rick Tankard
Maintainer: Rick Tankard <[email protected]>
Description: Detecting expansions with paired-end Illumina sequencing data.
Expand Down
115 changes: 105 additions & 10 deletions R/CLASS_exstra_score.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
#' @import stringr
#' @import testit
#'
#' @title Test if object is an exstra_score object
#'
#' @param x Object to be tested
#'
#' @return Logical
#'
#' @export
is.exstra_score <- function(x) inherits(x, "exstra_score")

Expand Down Expand Up @@ -110,28 +116,102 @@ plot_names.exstra_score <- function(strscore, names) {

# This function allows square brackets to be used to select out the locus and sample
# BIG TODO: always list by locus, throughout the code!!!
#' Extract loci or samples frome exstra_score object
#'
#' Using \code{i} (select) syntax of data.table to extract loci and/or samples.
#' The first index is the loci filter on x$db, and second sample filter on x$samples.
#'
#' @param x exstra_score object
#' @param loc Select loci, using data.table filtering on x$db.
#' @param samp Select samples, using data.table filtering on x$samples.
#'
#' @return exstra_score object
#'
#' @examples
#'
#' # All data:
#' exstra_wgs_pcr_2
#'
#' # Extract one locus:
#' exstra_wgs_pcr_2["HD"]
#'
#' # Extract one sample:
#' exstra_wgs_pcr_2[, "WGSrpt_10"]
#'
#' # Extract all triplet repeats
#' exstra_wgs_pcr_2[unit_length == 3]
#' exstra_wgs_pcr_2[unit_length == 3]$db$locus
#'
#' # Extract all coding repeats
#' exstra_wgs_pcr_2[gene_region == "coding"]
#' exstra_wgs_pcr_2[gene_region == "coding"]$db$locus
#'
#' # Extract all case samples:
#' exstra_wgs_pcr_2[, group == "case"]
#'
#' # Extract by index
#' exstra_wgs_pcr_2[2, 5]
#'
#' @export
`[.exstra_score` <- function(x, loc, samp) {
assert("locus is not the key of x$data", key(x$data)[1] == "locus")
assert("sample is not the key of x$samples", key(x$samples)[1] == "sample")
assert("locus not the key of x$db", key(x$db)[1] == "locus")
if(!missing(loc)) {
x$db <- x$db[eval(substitute(loc))]
x$db <- x$db[eval(substitute(loc)), nomatch=0]
setkey(x$db, locus)
}
if(!missing(samp)) {
x$samples <- x$samples[eval(substitute(samp))]
x$samples <- x$samples[eval(substitute(samp)), nomatch=0]
setkey(x$samples, sample)
}
x$data <- x$data[x$db$locus][sample %in% x$samples$sample]
setkey(x$data, locus, sample)
x
}

#' Plot ECDF curves from an exstra_score object
#'
#'
#' @param rsc exstra_score object.
#' @param loci character; Only plot at the loci specified/
#' No effect if NULL.
#' @param sample_col Specify the colours for the given samples as a named character vector.
#' names() are sample names, while the values are characters specifying colours.
#' If specified, other samples are black by default, but may have an alpha value
#' less than 1 for transparency.
#' @param refline logical; if TRUE, then vertical lines are drawn indicating the reference size.
#' This does not take into account mismatches in the STR, or false repeat hits.
#' @param ylab Label for the y-axis.
#' @param verticals logical; if TRUE, vertical lines are drawn at steps. See \code{\link{plot.stepfun}}.
#' @param pch numeric; the plotting character size.
#' @param xlim numeric of length 2; the x-limits for the plot.
#' @param ylim numeric of length 2; the y-limits for the plot.
#' @param alpha_control Transparency alpha value for the control samples.
#' @param alpha_case Transparency alpha value for the case samples. No effect if NULL.
#' @param xlinked If not "all", limits X chromosome loci to only "male" or "female" samples.
#' Can also filter on only samples with "known" or "missing" sex.
#' @param xlinked.safe logical; if TRUE when xlinked is filtering on "male" or "female",
#' an error will be raised if any samples have an undefined sex.
#' If FALSE, samples with missing sex will also be filtered when xlinked is "male" or "female".
#' @param x_upper_missing If xlim is not specified, then loci with no data will have this
#' upper x-axis limit.
#' @param ... Further arguments to the \code{plot.ecdf} function.
#'
#' @examples
#' plot(exstra_wgs_pcr_2["HD"])
#'
#' plot(exstra_wgs_pcr_2, "HD", sample_col = c("WGSrpt_10" = "red", "WGSrpt_12" = "blue"))
#'
#' # X-linked disorder, show male samples only:
#' plot(exstra_wgs_pcr_2, "SBMA", xlinked = "male")
#'
#' @export
plot.exstra_score <- function(rsc, loci = NULL, sample_col = NULL, refline = TRUE, ylab="Fn(x)", verticals = TRUE,
plot.exstra_score <- function(rsc, loci = NULL, sample_col = NULL,
refline = TRUE, ylab="Fn(x)", verticals = TRUE,
pch = 19, xlim, ylim = c(0,1), alpha_control = 0.5, alpha_case = NULL,
xlinked = "all", xlinked.safe = TRUE, x_upper_missing = 150, ...) {
xlinked = "all", xlinked.safe = TRUE, x_upper_missing = 150,
main = NULL, ...) {
# Plot ECDFs of rep score data
# sample_col should be a named vector, sample names as the name and color as the value
# refline: if TRUE, include reference
Expand All @@ -153,10 +233,16 @@ plot.exstra_score <- function(rsc, loci = NULL, sample_col = NULL, refline = TRU
for(locus.name in strlocis) {
#strrir.trim <- trim.rep_in_read_data(strrir, trimming)
for(xlinked in xlinked_loop) {
main.title <- paste(loci_text_info(rsc, locus.name), "score ECDF")
if(xlinked != "all" && grepl("X", str_score_fil$db[locus.name]$Gene.location)) {
plot_data <- str_filter_sex(rsc, xlinked, xlinked.safe)$data[locus == locus.name]
main.title <- paste(main.title, paste0(xlinked, 's'))
if(is.null(main)) {
main.title <- paste(loci_text_info(rsc, locus.name), "score ECDF")
} else {
main.title <- main
}
if(xlinked != "all" && grepl("X", rsc$db[locus.name]$inheritance)) {
plot_data <- filter_sex(rsc, xlinked, xlinked.safe)$data[locus == locus.name]
if(is.null(main)) {
main.title <- paste(main.title, paste0(xlinked, 's'))
}
} else {
plot_data <- rsc$data[locus.name, nomatch = 0]
}
Expand Down Expand Up @@ -203,7 +289,10 @@ plot.exstra_score <- function(rsc, loci = NULL, sample_col = NULL, refline = TRU
# TODO: easy renaming of samples


#' Copy an exstra_score object so referencing does not cause problems.
#' Copy an exstra_score object.
#'
#' Prevents changing both objects on changes by reference, that do not copy on write.
#'
#' @export
copy.exstra_score <- function(x) {
x$data %<>% copy()
Expand All @@ -214,7 +303,8 @@ copy.exstra_score <- function(x) {
}


#' Length of an exstra_score object
#' Number of data points in exstra_score object
#'
#' @export
length.exstra_score <- function(x) {
x$data[, .N]
Expand All @@ -227,6 +317,11 @@ length.exstra_score <- function(x) {
}

#' Dimension of exstra_score object
#'
#' @param x An exstra_score object.
#'
#' @return Vector of length two, the number of loci and number of samples respectively.
#'
#' @export
dim.exstra_score <- function(x) {
c(exstra_wgs_pcr_2$db[, .N], exstra_wgs_pcr_2$samples[, .N])
Expand Down
64 changes: 58 additions & 6 deletions R/CLASS_exstra_tsum.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
#' Checks for the class attribute only.
#' Does not check for correctness.
#'
#' @param x Object to be tested
#'
#' @return Logical
#'
#' @import data.table
#' @import stringr
#' @import testit
Expand Down Expand Up @@ -108,9 +112,15 @@ print.exstra_tsum <- function(x, ...) {
#' @param tsum An exstra_tsum object.
#' @param loci Character vector of locus or loci to plot.
#' @param sample_col Named (samples) vector of charcters defining colors.
#' @param ... further arguments to plot.exstra_score
#' @param correction Apply this correction method from \code{\link{p_values}}.
#' If NULL, then the correction already applied to tsum is used.
#' @param alpha Significance level.
#' If NULL, then the significance already applied to tsum is used. (default 0.05)
#' @param controls_label Generic label for all control samples.
#' @param alpha_nonsignif Transparency alpha value for nonsignificant samples.
#' @param ... further arguments to \code{\link{plot.exstra_score}}
#'
#' @seealso plot.exstra_score
#' @seealso \code{\link{plot.exstra_score}}
#'
#' @export
plot.exstra_tsum <- function(tsum, loci = NULL, sample_col = NULL,
Expand Down Expand Up @@ -176,11 +186,51 @@ plot.exstra_tsum <- function(tsum, loci = NULL, sample_col = NULL,
# TODO
}

#TODO:
# brackets [, ]
#' Extract loci or samples from exstra_tsum object
#'
#' Using \code{i} (select) syntax of data.table to extract loci and/or samples.
#' The first index is the loci filter on x$db, and second sample filter on x$samples.
#'
#' @param x exstra_score object
#' @param loc Select loci, using data.table filtering on x$db.
#' @param samp Select samples, using data.table filtering on x$samples.
#'
#' @return exstra_tsum object
#'
#' @examples
#' # Run tsum_test()
#' (tsum <- tsum_test(exstra_wgs_pcr_2[c("HD", "SCA2", "SCA6", "FRDA")], B = 100))
#'
#' # All data:
#' tsum
#'
#' # Extract one locus:
#' tsum["HD"]
#'
#' # Extract one sample:
#' tsum[, "WGSrpt_10"]
#'
#' # One sample at one locus:
#' tsum["HD", "WGSrpt_10"]
#'
#' # Extract by index
#' tsum[2, 5]
#'
#' # The following are intended to work, but do not at present:
#' # Extract all triplet repeats
#' ## tsum[unit_length == 3]
#' ## tsum[unit_length == 3]$db$locus
#'
#' # Extract all coding repeats
#' ## tsum[gene_region == "coding"]
#' ## tsum[gene_region == "coding"]$db$locus
#'
#' # Extract all case samples:
#' ## tsum[, group == "case"]
#'
#' @export
`[.exstra_tsum` <- function(x, loc, samp) {
assert("locus is not the key of x$T", key(x$T)[1] == "locus")
assert("locus is not the key of x$db", key(x$db)[1] == "locus")
# recycle code for exstra_score:
if(missing(loc)) {
if(!missing(samp)) {
Expand All @@ -194,7 +244,7 @@ plot.exstra_tsum <- function(tsum, loci = NULL, sample_col = NULL,
}
}
# cut class specific loci
x$stats <- x$stats[x$db$locus][sample %in% x$samples$sample]
x$stats <- x$stats[x$db$locus, nomatch=0][sample %in% x$samples$sample, nomatch=0]
# matrix cut. We do not attempt to filter samples here as it is more complicated, and
# this is for diagnostics mostly.
x$qmats <- x$qmats[x$db$locus]
Expand All @@ -205,6 +255,8 @@ plot.exstra_tsum <- function(tsum, loci = NULL, sample_col = NULL,

#' Copy an exstra_tsum object
#'
#' Prevents changing both objects on changes by reference, that do not copy on write.
#'
#' @export
copy.exstra_tsum <- function(x) {
x <- copy.exstra_score(x)
Expand Down
35 changes: 31 additions & 4 deletions R/plot_multi.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,42 @@
#' Multiple STR score ECDF plots
#'
#' Allows plotting the STR scores of many loci, including direct to files.
#' Allows plotting the STR scores of many loci with legend.
#' Plots may be written directly to PDF files.
#'
#' @param strscore exstra_score object.
#' @param prefix File name prefix.
#' @param dir Output directory for PDF plots.
#' @param plot_cols If a named vector, colours to use for each sample (names) for all loci.
#' Unspecified samples are treated as controls.
#' May be a named (loci) list with named vectors as above for specific
#' locus colours.
#' @param color_only List (named with loci) of a character vectors of samples to color at each locus.
#' @param plot_types vector; Specify the plot output device. May have multiple of the
#' following options if these are desired.
#' Set to: 1 for current device,
#' 2 for output to a single PDF file with multiple pages (when >1 locus), and
#' 3 for a PDF file per locus.
#' @param alpha_control Transparency alpha value for control samples.
#' @param alpha_case Transparency alpha value for case samples.
#' @param legend logical; if TRUE, include a legend.
#' @param legend_control logical; if TRUE, include generic control specification in legend.
#' @param controls_label character; a generic label in the legend for controls.
#' @param cases_label character; a generic label for case samples in the legend.
#' @param custom_legend A named (sample) vector of colors, for the legend.
#' @param ... Further arguments to \code{\link{plot.exstra_score}}
#'
#' @examples
#' # Plot 4 loci
#' par(mfrow = c(2, 2))
#' plot_multi(exstra_wgs_pcr_2[c("HD", "SCA6", "FRDA", "SCA1")], alpha_case = 0.2)
#'
#' @export
plot_multi <- function(strscore,
prefix = "exSTRa_plot",
dir = "images/",
plot_cols = NULL, # list of the color of samples of each locus. Or just a named vector of colours to use for all loci.
color_only = NULL, # list (name = locus) of samples to color at each locus
plottypes = 1, # 1 to current device, 2 to a single PDF file with multiple pages, 3 to individual PDF files per locus.
plot_types = 1, # 1 to current device, 2 to a single PDF file with multiple pages, 3 to individual PDF files per locus.
alpha_control = 0.5,
alpha_case = NULL,
legend = TRUE,
Expand All @@ -21,14 +48,14 @@ plot_multi <- function(strscore,
) {

# plot_many_str_score(mixed_cohorts_wgs, "STR-score-WGS-01-to-03", plot_cols,
# plottypes = 3, alpha_case = 0.5, alpha_control = 0.3, legend = FALSE)
# plot_types = 3, alpha_case = 0.5, alpha_control = 0.3, legend = FALSE)

plot_many_str_score(strscore,
typename = prefix,
plot_cols,
loci = NULL,
color_only = color_only,
plottypes = plottypes,
plot_types = plot_types,
dirbase = paste0(dir, "/"),
alpha_control = alpha_control,
alpha_case = alpha_case,
Expand Down
10 changes: 5 additions & 5 deletions R/private_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,18 @@ remove_below_quant <- function(loc_data, quant = 0.5) {
}

plot_many_str_score <- function(strscore, typename, plot_cols, loci = NULL,
color_only = NULL, plottypes = 1, dirbase = "images/",
color_only = NULL, plot_types = 1, dirbase = "images/",
alpha_control = 0.5, alpha_case = NULL,
legend = TRUE, legend_control = TRUE, controls_label = "controls",
cases_label = NULL, custom_legend = NULL,
...) {
# typename a non-empty character string
# plottypes should be vector of 1 to 3 can be 1:3
# plot_types should be vector of 1 to 3 can be 1:3
# plot_cols may be a list, named by loci
# color_only is a list, each item indicating the items to be coloured
#TODO: check that sample names are correct
# legend_custom, a named vector of colors for the legend
if(any(is.element(plottypes, 2:3))) {
if(any(is.element(plot_types, 2:3))) {
# dir.create(paste0(dirbase, typename), recursive = TRUE)
# The version below is more intuitive for other users
dir.create(dirbase, recursive = TRUE)
Expand All @@ -48,7 +48,7 @@ plot_many_str_score <- function(strscore, typename, plot_cols, loci = NULL,
if(is.null(loci)) {
loci <- loci(strscore)
}
for(i in plottypes) {
for(i in plot_types) {
if(i == 2) { pdf(paste0(dirbase, typename, ".pdf"), useDingbats=FALSE) }
for(loc in loci) {
if(i == 3) {
Expand Down Expand Up @@ -253,7 +253,7 @@ analyse_str_score_mw <- function(strscore, plot_cols, filebase = c(), actual = N
# ECDF plots
#TODO: fix up for significance.threshold
plot_many_str_score(strscore, paste0("STR-score-", filebase, bf_name), plot_cols,
color_only = str_significant(score_p, significance.threshold), plottypes = 3, dirbase = dirbase)
color_only = str_significant(score_p, significance.threshold), plot_types = 3, dirbase = dirbase)

# Histograms
dirbase_ext <- paste0(dirbase, "STR-score-", filebase, "-histograms/")
Expand Down
Loading

0 comments on commit 8b32271

Please sign in to comment.