Skip to content

Commit

Permalink
Switch to theta (#17)
Browse files Browse the repository at this point in the history
* global change of psiSite -> theta
* fix typos and documentation
* adapt and speedup examples
* fixing #16
  • Loading branch information
c-mertes authored Oct 21, 2020
1 parent 82f1586 commit 07a82e2
Show file tree
Hide file tree
Showing 34 changed files with 278 additions and 232 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

# ignore tmp files
tmp
inst/igv
inst/tmp*

# ignore vignette artefacts
FraseR.pdf
Expand Down
1 change: 0 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ Collate:
AllGenerics-definitions.R
AllGenerics.R
Fraser-pipeline.R
accessor-methods.R
annotationOfRanges.R
beta-binomial-testing.R
calculatePSIValue.R
Expand Down
25 changes: 12 additions & 13 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ asFDS <- function(x){
#' @param object A FraserDataSet object.
#' @param value The new value that should replace the current one.
#' @param x A FraserDataSet object.
#' @param type The psi type (psi3, psi5 or psiSite)
#' @param type The psi type (psi3, psi5 or theta)
#' @return Getter method return the respective current value.
#' @examples
#' fds <- createTestFraserDataSet()
Expand All @@ -47,9 +47,9 @@ asFDS <- function(x){
#' scanBamParam(fds) <- ScanBamParam(mapqFilter=30)
#' nonSplicedReads(fds)
#' rowRanges(fds)
#' rowRanges(fds, type="psiSite")
#' rowRanges(fds, type="theta")
#' mcols(fds, type="psi5")
#' mcols(fds, type="psiSite")
#' mcols(fds, type="theta")
#' seqlevels(fds)
#' seqlevels(mapSeqlevels(fds, style="UCSC"))
#' seqlevels(mapSeqlevels(fds, style="Ensembl"))
Expand Down Expand Up @@ -518,7 +518,7 @@ setReplaceMethod("rowRanges", "FraserDataSet", FRASER.rowRanges.replace)
#' counts(fds, type="psi5", side="ofInterest")
#' counts(fds, type="psi5", side="other")
#' head(K(fds, type="psi3"))
#' head(N(fds, type="psi3"))
#' head(N(fds, type="theta"))
#'
setMethod("counts", "FraserDataSet", function(object, type=NULL,
side=c("ofInterest", "otherSide")){
Expand All @@ -534,15 +534,15 @@ setMethod("counts", "FraserDataSet", function(object, type=NULL,
}

# extract psi value from type
type <- unlist(regmatches(type, gregexpr("psi(3|5|Site)", type, perl=TRUE)))
if(length(type) == 0){
stop(paste0("Please provide a correct psi type: psi5, psi3, and ",
"psiSite. Not the given one: '", type, "'."))
type <- whichPSIType(type)
if(length(type) == 0 | length(type) > 1){
stop(paste0("Please provide a correct psi type: psi5, psi3, or ",
"theta. Not the given one: '", type, "'."))
}
aname <- paste0("rawOtherCounts_", type)
if(!aname %in% assayNames(object)){
stop(paste0("Missing rawOtherCounts for type '", type, "'.",
"Please calculate PSIValues first. And then try again."))
" Please calculate PSIValues first. And then try again."))
}
return(assays(object)[[aname]])
})
Expand All @@ -559,8 +559,7 @@ setReplaceMethod("counts", "FraserDataSet", function(object, type=NULL,
type <- checkReadType(object, type)
aname <- paste0("rawCounts", toupper(type))
} else {
type <- unlist(
regmatches(type, gregexpr("psi(3|5|Site)", type, perl=TRUE)))
type <- whichPSIType(type)
aname <- paste0("rawOtherCounts_", type)
}
assays(object, ...)[[aname]] <- value
Expand Down Expand Up @@ -745,7 +744,7 @@ FRASER.results <- function(object, sampleIDs, fdrCutoff, zscoreCutoff,
#' the gene symbols.
#' @param method The p.adjust method that is being used to adjust p values per
#' sample.
#' @param type Splicing type (psi5, psi3 or psiSite)
#' @param type Splicing type (psi5, psi3 or theta)
#' @param by By default \code{none} which means no grouping. But if
#' \code{sample} or \code{feature} is specified the sum by
#' sample or feature is returned
Expand Down Expand Up @@ -790,7 +789,7 @@ FRASER.results <- function(object, sampleIDs, fdrCutoff, zscoreCutoff,
setMethod("results", "FraserDataSet", function(object,
sampleIDs=samples(object), padjCutoff=0.05,
zScoreCutoff=NA, deltaPsiCutoff=0.3,
minCount=5, psiType=c("psi3", "psi5", "psiSite"),
minCount=5, psiType=c("psi3", "psi5", "theta"),
additionalColumns=NULL, BPPARAM=bpparam(), ...){
FRASER.results(object=object, sampleIDs=sampleIDs, fdrCutoff=padjCutoff,
zscoreCutoff=zScoreCutoff, dPsiCutoff=deltaPsiCutoff,
Expand Down
5 changes: 2 additions & 3 deletions R/Fraser-pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#' splicing types.
#' @param implementation The method that should be used to correct for
#' confounders.
#' @param type The type of PSI (psi5, psi3 or psiSite for theta/splicing
#' @param type The type of PSI (psi5, psi3 or theta for theta/splicing
#' efficiency)
#' @param iterations The maximal number of iterations. When the autoencoder has
#' not yet converged after these number of iterations, the fit stops anyway.
Expand All @@ -47,8 +47,7 @@
#' # preprocessing
#' fds <- createTestFraserDataSet()
#'
#' # when running FRASER on a real dataset, one should run the following
#' # two commands first (not run here to make the example run faster):
#' # filtering not expressed introns
#' fds <- calculatePSIValues(fds)
#' fds <- filterExpressionAndVariability(fds)
#'
Expand Down
16 changes: 16 additions & 0 deletions R/FraserDataSet-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,16 @@ FraserDataSet <- function(colData=NULL, junctions=NULL, spliceSites=NULL, ...) {
stop("Please provdie splice site counts if you provide ",
"junction counts.")
}
if(is.character(junctions)){
if(!file.exists(junctions))
stop("Junction file '", junctions, "' does not exists")
junctions <- fread(junctions)
}
if(is.character(spliceSites)){
if(!file.exists(spliceSites))
stop("SpliceSite file '", spliceSites, "' does not exists")
spliceSites <- fread(spliceSites)
}
if(is.data.frame(junctions)){
junctions <- makeGRangesFromDataFrame(junctions,
keep.extra.columns=TRUE)
Expand All @@ -272,6 +282,12 @@ FraserDataSet <- function(colData=NULL, junctions=NULL, spliceSites=NULL, ...) {
spliceSites <- makeGRangesFromDataFrame(spliceSites,
keep.extra.columns=TRUE)
}
if(!is(junctions, "GRanges")){
stop("Provided junction object is not a GRanges object.")
}
if(!is(spliceSites, "GRanges")){
stop("Provided spliceSite object is not a GRanges object.")
}
nsr <- SummarizedExperiment(
rowRanges=spliceSites[,c("spliceSiteID", "type")],
assays=SimpleList(rawCountsSS=as.data.frame(
Expand Down
15 changes: 0 additions & 15 deletions R/accessor-methods.R

This file was deleted.

4 changes: 2 additions & 2 deletions R/annotationOfRanges.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ annotateRanges <- function(fds, feature="hgnc_symbol", featureName=feature,
biotype, useUSCS)

# annotate split reads
for(i in c("psi3", "psiSite")){
for(i in c("psi3", "theta")){
gr <- rowRanges(fds, type=i)
if(any(strand(gr) == "*")){
strand(annotation) <- "*"
Expand Down Expand Up @@ -124,7 +124,7 @@ annotateRangesWithTxDb <- function(fds, feature="SYMBOL",
}
}

for(i in c("psi3", "psiSite")){
for(i in c("psi3", "theta")){
# get GRanges object with the split reads which should be annotated
gr <- rowRanges(fds, type=i)

Expand Down
2 changes: 1 addition & 1 deletion R/beta-binomial-testing.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ pvalueByBetaBinomialPerType <- function(fds, aname, psiType, pvalFun,
rawOtherCounts <- counts(fds, type=psiType, side="otherSide")

# swap rawCounts with others for intron retention
if(psiType == "psiSite"){
if(psiType == "theta"){
tmp <- rawCounts
rawCounts <- rawOtherCounts
rawOtherCounts <- tmp
Expand Down
22 changes: 11 additions & 11 deletions R/calculatePSIValue.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#'
#' @inheritParams countRNA
#' @param types A vector with the psi types which should be calculated. Default
#' is all of psi5, psi3 and psiSite.
#' is all of psi5, psi3 and theta.
#' @param overwriteCts FALSE or TRUE (the default) the total counts (aka N) will
#' be recalculated based on the existing junction counts (aka K)
#' @return FraserDataSet
Expand Down Expand Up @@ -185,12 +185,12 @@ calculateSitePSIValue <- function(fds, overwriteCts, BPPARAM){

message(date(), ": Calculate the PSI site values ...")

psiName <- "psiSite"
psiROCName <- "rawOtherCounts_psiSite"
psiName <- "theta"
psiROCName <- "rawOtherCounts_theta"
if(!psiROCName %in% assayNames(fds)){
overwriteCts <- TRUE
}
psiH5datasetName <- "oSite_psiSite"
psiH5datasetName <- "oSite_theta"

# prepare data table for calculating the psi value
countData <- data.table(
Expand All @@ -205,7 +205,7 @@ calculateSitePSIValue <- function(fds, overwriteCts, BPPARAM){
)
)

psiSiteValues <- bplapply(samples(fds), countData=countData, fds=fds,
thetaValues <- bplapply(samples(fds), countData=countData, fds=fds,
BPPARAM=BPPARAM, FUN=function(sample, countData, fds){
if(verbose(fds) > 3){
message("sample: ", sample)
Expand All @@ -214,18 +214,18 @@ calculateSitePSIValue <- function(fds, overwriteCts, BPPARAM){
# get sample
sample <- as.character(sample)

# get counts and psiSite values from cache file if it exists
# get counts and theta values from cache file if it exists
cacheFile <- getOtherCountsCacheFile(sample, fds)
ans <- checkPsiCacheFile(cFile=cacheFile, dName=psiH5datasetName,
overwrite=overwriteCts, ptype="psiSite", fds=fds)
overwrite=overwriteCts, ptype="theta", fds=fds)
if(!is.null(ans)){
return(ans)
}

# add sample specific counts to the data.table
sdata <- data.table(k=c(
rep(K(fds, type="psi3")[,sample], 2),
K(fds, type="psiSite")[,sample]))
K(fds, type="theta")[,sample]))
sdata <- cbind(countData, sdata)
sdata[,os:=sum(k)-k, by="spliceSiteID"]

Expand Down Expand Up @@ -257,15 +257,15 @@ calculateSitePSIValue <- function(fds, overwriteCts, BPPARAM){
HDF5Array(filepath=cacheFile, name=psiH5datasetName)
}
)
names(psiSiteValues) <- samples(fds)
names(thetaValues) <- samples(fds)

# merge it and assign it to our object
assay(fds, type="ss", psiName, withDimnames=FALSE) <- do.call(cbind,
mapply('[', psiSiteValues, TRUE, 2, drop=FALSE,
mapply('[', thetaValues, TRUE, 2, drop=FALSE,
SIMPLIFY=FALSE))
if(isTRUE(overwriteCts)){
assay(fds, type="ss", psiROCName, withDimnames=FALSE) <- do.call(cbind,
mapply('[', psiSiteValues, TRUE, 1, drop=FALSE,
mapply('[', thetaValues, TRUE, 1, drop=FALSE,
SIMPLIFY=FALSE))
}

Expand Down
13 changes: 7 additions & 6 deletions R/countRNAseqData.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,14 @@
#' @param recount if TRUE the cache is ignored and the bam file is recounted.
#' @param genome NULL (default) or a character vector specifying the names of
#' the reference genomes that were used to align the reads for each sample. The
#' names have to be in a way accepted by the \link{getBSgenome} function.
#' Available genomes can be listed using the \link{available.genomes} function
#' from the BSgenome package. If genome is of length 1, the same reference
#' genome will be used for all samples.
#' If \code{genome} is supplied and \code{strandSpecific(fds) == 0L}
#' names have to be in a way accepted by the \code{\link[BSgenome]{getBSgenome}}
#' function. Available genomes can be listed using the
#' \code{\link[BSgenome]{available.genomes}} function from the BSgenome package.
#' If genome is of length 1, the same reference genome will be used for all
#' samples. If \code{genome} is supplied and \code{strandSpecific(fds) == 0L}
#' (unstranded), then the strand information will be estimated by checking the
#' dinucleotides found at the intron boundaries (see ?\link{summarizeJunctions}
#' dinucleotides found at the intron boundaries
#' (see \code{\link[GenomicAlignments]{summarizeJunctions}}
#' in GenomicAlignments package for details). This can e.g. help to avoid
#' ambiguities when adding gene names from a gene annotation to the introns
#' in a later step.
Expand Down
2 changes: 1 addition & 1 deletion R/filterExpression.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ filterVariability <- function(object, minDeltaPsi=0, filter=TRUE,
ctsSE <- K(object, type="ss")
ctsN5 <- N(object, type="psi5")
ctsN3 <- N(object, type="psi3")
ctsNSE <- N(object, type="psiSite")
ctsNSE <- N(object, type="theta")

if(isFALSE(delayed)){
cts <- as.matrix(cts)
Expand Down
10 changes: 5 additions & 5 deletions R/getNSetterFuns.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' the values within the FRASER model.
#'
#' @param fds An FraserDataSet object.
#' @param type The type of psi (psi5, psi3 or psiSite)
#' @param type The type of psi (psi5, psi3 or theta)
#' @param byGroup If TRUE, aggregation by donor/acceptor site will be done.
#' @param dist Distribution for which the p-values should be extracted.
#' @param level Indicates if the retrieved p values should be adjusted on the
Expand Down Expand Up @@ -47,9 +47,9 @@
#' pseudocount()
#'
#' # retrieve or set a mask to exclude certain junctions in the fitting step
#' featureExclusionMask(fds, type="psiSite") <- sample(
#' c(FALSE, TRUE), nrow(mcols(fds, type="psiSite")), replace=TRUE)
#' featureExclusionMask(fds, type="psiSite")
#' featureExclusionMask(fds, type="theta") <- sample(
#' c(FALSE, TRUE), nrow(mcols(fds, type="theta")), replace=TRUE)
#' featureExclusionMask(fds, type="theta")
#'
#' # controlling the verbosity level of the output of some algorithms
#' verbose(fds) <- 2
Expand Down Expand Up @@ -527,7 +527,7 @@ weights <- function(fds, type){
getIndexFromResultTable <- function(fds, resultTable, padj.method="holm"){
type <- as.character(resultTable$type)
target <- makeGRangesFromDataFrame(resultTable)
if(type == "psiSite"){
if(type == "theta"){
gr <- granges(asSE(nonSplicedReads(fds)))
} else {
gr <- granges(asSE(fds))
Expand Down
6 changes: 3 additions & 3 deletions R/helper-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ checkReadType <- function(fds, type){
}
type <- unique(type)
stopifnot(isScalarCharacter(type))
correctTypes <- c(psi3="j", psi5="j", psiSite="ss")
correctTypes <- c(psi3="j", psi5="j", theta="ss")

# check if it is already the correct type
if(type %in% correctTypes) return(type)
Expand Down Expand Up @@ -109,7 +109,7 @@ checkReadType <- function(fds, type){
#'
#' @noRd
whichPSIType <- function(type){
unlist(regmatches(type, gregexpr("psi(3|5|Site)", type, perl=TRUE)))
unlist(regmatches(type, gregexpr("psi(3|5)|theta", type, perl=TRUE)))
}

#'
Expand All @@ -120,7 +120,7 @@ whichReadType <- function(fds, name){
stopifnot(isScalarCharacter(name))

# check writing
if(name == "ss" | endsWith(name, "psiSite"))
if(name == "ss" | endsWith(name, "theta"))
return("ss")
if(name == "j" | endsWith(name, "psi5") | endsWith(name, "psi3"))
return("j")
Expand Down
Loading

0 comments on commit 07a82e2

Please sign in to comment.