Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
ctokheim committed May 17, 2016
2 parents be64eff + 1fc9f9d commit 54a509d
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 54 deletions.
85 changes: 57 additions & 28 deletions R/cancerSeqStudy.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ if ("getopt" %in% rownames(installed.packages())){
'mcores', 'c', 1, 'integer',
'output', 'o', 1, 'character',
'ratioMetric', 'r', 2, 'double',
'driverFrac', 'd', 2, 'double',
'help', 'h', 0, 'logical'
), byrow=TRUE, ncol=4)
opt = getopt(spec)
Expand All @@ -24,7 +25,6 @@ suppressPackageStartupMessages(library(VGAM))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(parallel))


#############################
# Convert a rate and coefficient
# of variation parameter into
Expand All @@ -44,7 +44,6 @@ rateCvToAlphaBeta <- function(rate, cv) {
return(list(alpha=my.alpha, beta=my.beta))
}


#############################
# Analyze power and false positives
# when using a beta-binomial model
Expand All @@ -54,7 +53,7 @@ smgBbdFullAnalysis <- function(mu, cv, Leff, signif.level, effect.size,

# find the power and numer of samples needed for a desired power
powerResult <- smgBbdRequiredSampleSize(desired.power, mu, cv, samp.sizes,
effect.size, signif.level, Leff)
effect.size, signif.level, Leff)
bbd.samp.size.min <- powerResult$samp.size.min
bbd.samp.size.max <- powerResult$samp.size.max
power.result.bbd <- powerResult$power
Expand Down Expand Up @@ -82,12 +81,12 @@ smgBbdFullAnalysis <- function(mu, cv, Leff, signif.level, effect.size,
}


ratiometricBbdFullAnalysis <- function(p, cv, mu, L, signif.level, effect.size,
desired.power, samp.sizes){
ratiometricBbdFullAnalysis <- function(p, cv, mu, Leff, signif.level, effect.size,
desired.power, samp.sizes, Df){

# find the power and numer of samples needed for a desired power
powerResult <- ratiometricBbdRequiredSampleSize(p, cv, desired.power, samp.sizes, mu,
effect.size, signif.level, L)
effect.size, Df, signif.level, Leff)
bbd.samp.size.min <- powerResult$samp.size.min
bbd.samp.size.max <- powerResult$samp.size.max
power.result.bbd <- powerResult$power
Expand All @@ -98,7 +97,7 @@ ratiometricBbdFullAnalysis <- function(p, cv, mu, L, signif.level, effect.size,

# find expected number of false positives
fp.result <- ratiometric.binom.false.pos(params$alpha, params$beta, samp.sizes,
mu, L, signif.level=signif.level)
mu, Leff, signif.level=signif.level)

# save binomial data
tmp.df <- data.frame(sample.size=samp.sizes)
Expand All @@ -109,7 +108,8 @@ ratiometricBbdFullAnalysis <- function(p, cv, mu, L, signif.level, effect.size,
tmp.df['signif.level'] <- signif.level
tmp.df['effect.size'] <- effect.size
tmp.df['mutation.rate'] <- mu
tmp.df["Ratio-metric.P"] <- p
tmp.df["Ratio-metric P"] <- p
tmp.df["Driver fraction"] <- Df
tmp.df["FP"] <- fp.result

return(tmp.df)
Expand Down Expand Up @@ -142,11 +142,11 @@ smgBinomFullAnalysis <- function(mu, Leff, signif.level, effect.size,
return(tmp.df)
}

ratiometricBinomFullAnalysis <- function(p, mu, L, signif.level, effect.size,
desired.power, samp.sizes){
ratiometricBinomFullAnalysis <- function(p, mu, Leff, signif.level, effect.size,
desired.power, samp.sizes, Df){
# calculate power
power.result.binom <- ratiometric.binom.power(p, samp.sizes, mu, L,
signif.level=signif.level,
power.result.binom <- ratiometric.binom.power(p, samp.sizes, mu, Leff=Leff,
Df=Df, signif.level=signif.level,
r=effect.size)
binom.samp.size.min <- samp.sizes[min(which(power.result.binom>=desired.power))]
binom.samp.size.max <- samp.sizes[max(which(power.result.binom<desired.power))+1]
Expand All @@ -160,7 +160,8 @@ ratiometricBinomFullAnalysis <- function(p, mu, L, signif.level, effect.size,
tmp.df['signif.level'] <- signif.level
tmp.df['effect.size'] <- effect.size
tmp.df['mutation.rate'] <- mu
tmp.df["Ratio-metric.P"] <- p
tmp.df["Ratio-metric P"] <- p
tmp.df["Driver fraction"] <- Df
tmp.df["FP"] <- NA

return(tmp.df)
Expand All @@ -186,7 +187,7 @@ runSmgAnalysisList <- function(x, samp.sizes,
}

runRatiometricAnalysisList <- function(x, samp.sizes,
desired.power=.9, L=1500,
Df=1.0, desired.power=.9, Leff=1500*3/4,
possible.cvs=c()){
# unpack the parameters
myp <- x[1]
Expand All @@ -196,7 +197,7 @@ runRatiometricAnalysisList <- function(x, samp.sizes,

# run analysis
result.df <- runRatiometricAnalysis(myp, mymu, myeffect.size, myalpha.level,
samp.sizes, desired.power, L, possible.cvs)
samp.sizes, desired.power, Leff, Df, possible.cvs)

return(result.df)
}
Expand All @@ -205,11 +206,11 @@ runRatiometricAnalysisList <- function(x, samp.sizes,
#' rate, effect.size, and significance level. The purpose of this function is parallelized
#' code running over a list of parameters. If you are not parallelizing, then use the
#' runAnalysis function.
runAnalysisList <- function(x, analysisType="smg", Leff=1500*3/4, L=1500, ...){
runAnalysisList <- function(x, analysisType="smg", Leff=1500*3/4, Df=1.0, ...){
if (analysisType=="smg"){
result <- runSmgAnalysisList(x, Leff=Leff, ...)
} else{
result <- runRatiometricAnalysisList(x, L=L, ...)
result <- runRatiometricAnalysisList(x, Leff=Leff, Df=Df, ...)
}
return(result)
}
Expand All @@ -223,7 +224,7 @@ runSmgAnalysis <- function(mu, effect.size, signif.level,
for (mycv in possible.cvs){
# calculate false positives and power
tmp.df <- smgBbdFullAnalysis(mu, mycv, Leff, signif.level, effect.size,
desired.power, samp.sizes)
desired.power, samp.sizes)
result.df <- rbind(result.df, tmp.df)
}

Expand All @@ -237,29 +238,53 @@ runSmgAnalysis <- function(mu, effect.size, signif.level,
#' Runs the entire power and false positive analysis pipeline.
runRatiometricAnalysis <- function(p, mu, effect.size, signif.level,
samp.sizes, desired.power=.9,
L=1500, possible.cvs=c()){
Leff=1500*3/4, Df=1.0, possible.cvs=c()){
# run beta-binomial model
result.df <- data.frame()
for (mycv in possible.cvs){
# calculate false positives and power
tmp.df <- ratiometricBbdFullAnalysis(p, mycv, mu, L, signif.level, effect.size,
desired.power, samp.sizes)
tmp.df <- ratiometricBbdFullAnalysis(p, mycv, mu, Leff, signif.level, effect.size,
desired.power, samp.sizes, Df)
result.df <- rbind(result.df, tmp.df)
}

# save binomial data
tmp.df <- ratiometricBinomFullAnalysis(p, mu, L, signif.level,
effect.size, desired.power, samp.sizes)
tmp.df <- ratiometricBinomFullAnalysis(p, mu, Leff, signif.level,
effect.size, desired.power,
samp.sizes, Df)
result.df <- rbind(result.df, tmp.df)

return(result.df)
}

#' Utility function to get location of script when
#' executing Rscript.
thisfile <- function() {
cmdArgs = commandArgs(trailingOnly = FALSE)
needle = "--file="
match = grep(needle, cmdArgs)
if (length(match) > 0) {
# Rscript
return(normalizePath(sub(needle, "", cmdArgs[match])))
} else {
ls_vars = ls(sys.frames()[[1]])
if ("fileName" %in% ls_vars) {
# Source'd via RStudio
return(normalizePath(sys.frames()[[1]]$fileName))
} else {
# Source'd via R console
return(normalizePath(sys.frames()[[1]]$ofile))
}
}
}

# Run as a script if arguments provided
if (!is.null(opt$ARGS)){
# load other R files
source("smg.R")
source("ratioMetric.R")
script.path <- thisfile()
script.dir <- dirname(script.path)
source(file.path(script.dir, "smg.R"))
source(file.path(script.dir, "ratioMetric.R"))

#############################
# define the model params
Expand All @@ -285,10 +310,14 @@ if (!is.null(opt$ARGS)){
possible.cvs <- c(.05, .1, .2) # coefficient of variation for mutation rate per base
effect.sizes <- c(.01, .02, .05) # fraction of samples above background
alpha.levels <- c(5e-6) # list for level of significance

# fraction of driver mutations being a certain
# category of interest for ratio-metric methods
driver.frac <- opt$driverFrac

# setting up the sample sizes to check
N <- 25000
by.step <- 25
by.step <- 10
samp.sizes <- seq(by.step, N, by=by.step) # grid of sample sizes to check

##################################
Expand Down Expand Up @@ -316,8 +345,8 @@ if (!is.null(opt$ARGS)){
############################
result.list <- mclapply(param.list, runAnalysisList, mc.cores=opt$mcores,
analysisType=cmdType, samp.sizes=samp.sizes,
desired.power=desired.power,
Leff=Leff, L=L, possible.cvs=possible.cvs)
desired.power=desired.power, Df=driver.frac,
Leff=Leff, possible.cvs=possible.cvs)
result.df <- do.call("rbind", result.list)

# adjust mutation rates back to the average
Expand Down
50 changes: 26 additions & 24 deletions R/ratioMetric.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
suppressPackageStartupMessages(library(VGAM))
suppressPackageStartupMessages(library(reshape2))

##################################
# Power calculatations
Expand All @@ -11,20 +10,22 @@ suppressPackageStartupMessages(library(reshape2))
#' @param p background proportion of total mutations falling into specific category
#' @param N vector of sample sizes
#' @param mu per base rate of mutation
#' @param Df fraction of driver mutations that are the specific one of interest
#' @param Leff gene length in bases
#' @param r effect size for power analysis
#' @param signif.level alpha level for power analysis
#' @return vector containing power for each sample size
ratiometric.binom.power <- function(p, N, mu,
L=1500, r=.02,
ratiometric.binom.power <- function(p, N, mu,
Df=1.0, Leff=1500*3/4, r=.02,
signif.level=5e-6){
# figure out the target mutation rate for effect size is
muEffect <- 1 - ((1-mu)^(L) - r)^(1/L)
muEffect <- 1 - ((1-mu)^(Leff) - r)^(1/Leff)
# Calculate the discrepancy between the background and
# target effect size
muDiff <- muEffect - mu
# given the mutation rates calculate the target effect
# size for a ratio-metric method
pEffect <- (mu*p + muDiff) / muEffect
pEffect <- (mu*p + Df*muDiff) / muEffect

# iterate over the number of samples
power <- c()
Expand All @@ -33,7 +34,7 @@ ratiometric.binom.power <- function(p, N, mu,
# it is expected to occur at least 90% of the time
j <- 1
while(j){
prob <- pbinom(j-1, L*i, muEffect)
prob <- pbinom(j-1, Leff*i, muEffect)
if(prob >= .1){
mutEff <- j
break
Expand Down Expand Up @@ -69,24 +70,25 @@ ratiometric.binom.power <- function(p, N, mu,
#' @param my.beta beta parameter for beta binomial
#' @param N vector of sample sizes
#' @param mu per base rate of mutation
#' @param Leff length of gene in bases
#' @param r effect size for power analysis
#' @param signif.level alpha level for power analysis
#' @return vector containing power for each sample size
ratiometric.bbd.power <- function(my.alpha, my.beta,
N, mu,
L=1500, r=.02,
N, mu, Df=1.0,
Leff=1500*3/4, r=.02,
signif.level=5e-6){
# figure out what the ratio-metric probability is from
# the alpha and beta parameters
p <- my.alpha / (my.alpha + my.beta)
# figure out the target mutation rate for effect size is
muEffect <- 1 - ((1-mu)^(L) - r)^(1/L)
muEffect <- 1 - ((1-mu)^(Leff) - r)^(1/Leff)
# Calculate the discrepancy between the background and
# target effect size
muDiff <- muEffect - mu
# given the mutation rates calculate the target effect
# size for a ratio-metric method
pEffect <- (mu*p + muDiff) / muEffect
pEffect <- (mu*p + Df*muDiff) / muEffect

# iterate over the number of samples
power <- c()
Expand All @@ -95,7 +97,7 @@ ratiometric.bbd.power <- function(my.alpha, my.beta,
# it is expected to occur at least 90% of the time
j <- 1
while(j){
prob <- pbinom(j-1, L*i, muEffect)
prob <- pbinom(j-1, Leff*i, muEffect)
if(prob >= .1){
mutEff <- j
break
Expand Down Expand Up @@ -136,7 +138,7 @@ ratiometric.bbd.power <- function(my.alpha, my.beta,
#' @param num.genes number of genes that are tested
#' @param signif.level alpha level for power analysis
ratiometric.binom.false.pos <- function(my.alpha, my.beta,
N, mu, L=1500,
N, mu, Leff=1500*3/4,
num.genes=18500,
signif.level=5e-6){
# calculate the ratio-metric fraction from alpha and beta
Expand All @@ -157,7 +159,7 @@ ratiometric.binom.false.pos <- function(my.alpha, my.beta,
# }
# j <- j+1
#}
mutEff <- ceiling(L*i*mu)
mutEff <- ceiling(Leff*i*mu)

# step one, find critical threshold
j <- 1
Expand Down Expand Up @@ -196,10 +198,10 @@ ratiometric.binom.false.pos <- function(my.alpha, my.beta,
#' @param L gene length of CDS in bases for an average gene
#' @return List containing the smallest effect size with sufficient power
ratiometricBinomRequiredSampleSize <- function(p, desired.power, possible.samp.sizes, mu,
effect.size, signif.lvl=5e-6, L=1500){
effect.size, Df=1.0, signif.lvl=5e-6, Leff=1500*3/4){
# calculate power
power.result.ratio <- ratiometric.binom.power(p, possible.samp.sizes, mu, L,
signif.level=signif.lvl,
power.result.ratio <- ratiometric.binom.power(p, possible.samp.sizes, mu, Leff,
Df=Df, signif.level=signif.lvl,
r=effect.size)
ratiometric.samp.size.min <- possible.samp.sizes[min(which(power.result.ratio>=desired.power))]
ratiometric.samp.size.max <- possible.samp.sizes[max(which(power.result.ratio<desired.power))+1]
Expand All @@ -226,15 +228,15 @@ ratiometricBinomRequiredSampleSize <- function(p, desired.power, possible.samp.s
#' @param L gene length of CDS in bases for an average gene
#' @return List containing the smallest effect size with sufficient power
ratiometricBbdRequiredSampleSize <- function(p, cv, desired.power, possible.samp.sizes, mu,
effect.size, signif.lvl=5e-6, L=1500){
effect.size, Df=1.0, signif.lvl=5e-6, Leff=1500*3/4){
# get alpha and beta parameterization
# for beta-binomial
params <- rateCvToAlphaBeta(p, cv)

# calculate power
power.result.ratio <- ratiometric.bbd.power(params$alpha, params$beta,
possible.samp.sizes,
mu, L,
mu, Leff, Df=Df,
signif.level=signif.lvl,
r=effect.size)
ratiometric.samp.size.min <- possible.samp.sizes[min(which(power.result.ratio>=desired.power))]
Expand Down Expand Up @@ -266,12 +268,12 @@ ratiometricBbdRequiredSampleSize <- function(p, cv, desired.power, possible.samp
#' @param Leff effective gene length of CDS in bases for an average gene
#' @return List containing the smallest effect size with sufficient power
ratiometricBinomPoweredEffectSize <- function(possible.effect.sizes, desired.power, p, mu,
samp.size, signif.level=5e-6, L=1500) {
samp.size, Df=1.0, signif.level=5e-6, Leff=1500*3/4) {
# calculate the power for each effect size
pow.vec <- c()
for(effect.size in possible.effect.sizes){
pow <- ratiometric.binom.power(p, samp.size, mu, L,
signif.level=signif.level,
pow <- ratiometric.binom.power(p, samp.size, mu, Leff,
Df=Df, signif.level=signif.level,
r=effect.size)
pow.vec <- c(pow.vec, pow)
}
Expand Down Expand Up @@ -302,16 +304,16 @@ ratiometricBinomPoweredEffectSize <- function(possible.effect.sizes, desired.pow
#' @param Leff effective gene length of CDS in bases for an average gene
#' @return List containing the smallest effect size with sufficient power
ratiometricBbdPoweredEffectSize <- function(possible.effect.sizes, desired.power, p, cv, mu,
samp.size, signif.level=5e-6, L=1500) {
samp.size, Df=1.0, signif.level=5e-6, Leff=1500*3/4) {
# figure out alpha/beta for beta-binomial
params <- rateCvToAlphaBeta(p, cv)

# calculate the power for each effect size
pow.vec <- c()
for(effect.size in possible.effect.sizes){
pow <- ratiometric.bbd.power(params$alpha, params$beta,
samp.size, mu, L,
signif.level=signif.level,
samp.size, mu, Leff,
Df=Df, signif.level=signif.level,
r=effect.size)
pow.vec <- c(pow.vec, pow)
}
Expand Down
1 change: 0 additions & 1 deletion R/smg.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
suppressPackageStartupMessages(library(VGAM))
suppressPackageStartupMessages(library(reshape2))

##################################
# Power calculatations
Expand Down
Loading

0 comments on commit 54a509d

Please sign in to comment.