From 998524460c43138ca5124e4ba57b30d99803bf95 Mon Sep 17 00:00:00 2001 From: Remco Bouckaert Date: Tue, 12 May 2015 08:18:32 +1200 Subject: [PATCH] add approx likelihood --- src/snap/Data.java | 2 +- .../BetaApproximationLikelihood.java | 239 ++++++++++++++++++ 2 files changed, 240 insertions(+), 1 deletion(-) create mode 100644 src/snap/likelihood/BetaApproximationLikelihood.java diff --git a/src/snap/Data.java b/src/snap/Data.java index f2e1d64..63fafbf 100644 --- a/src/snap/Data.java +++ b/src/snap/Data.java @@ -510,7 +510,7 @@ public double getProportionZeros() { int w = getPatternWeight(i); for (int j = 0; j < p.length; j++) { oneCount += w * p[j]; - zeroCount += w * (n[j] - p[j]); + zeroCount += w * (n[j] - 1 - p[j]); } } return (double) zeroCount / ((double) zeroCount + oneCount); diff --git a/src/snap/likelihood/BetaApproximationLikelihood.java b/src/snap/likelihood/BetaApproximationLikelihood.java new file mode 100644 index 0000000..b52fa1d --- /dev/null +++ b/src/snap/likelihood/BetaApproximationLikelihood.java @@ -0,0 +1,239 @@ +package snap.likelihood; + + +import snap.Data; +import beast.core.Citation; +import beast.core.Description; +import beast.evolution.alignment.Alignment; +import beast.evolution.branchratemodel.BranchRateModel; +import beast.evolution.branchratemodel.StrictClockModel; +import beast.evolution.likelihood.GenericTreeLikelihood; +import beast.evolution.sitemodel.SiteModel; +import beast.evolution.tree.Node; +import beast.evolution.tree.Tree; +import beast.math.Binomial; +import beast.math.GammaFunction; + +import org.apache.commons.math.distribution.BetaDistribution; + +@Description("Implementation of Siren et al. Beta SNP approximation using Hiscott et al. integrator") +@Citation("") +public class BetaApproximationLikelihood extends GenericTreeLikelihood { + + + Tree tree; + Double rootTheta; //Parameter for root distribution. + Data data; + + int nPoints; //Size of mesh used in integration + int thisPattern; + protected double[][] partialIntegral; //Partial likelihoods. First index = node, second index = point number. + protected int[] lineageCounts; //Lineage counts for each leaf taxa, for current pattern. + protected int[] redCounts; //Red allele counts for each leaf, for current pattern. + protected double[] patternLogLikelihoods; + + /** + * flag to indicate the + * // when CLEAN=0, nothing needs to be recalculated for the node + * // when DIRTY=1 indicates a node partial needs to be recalculated + * // when FILTHY=2 indicates the indices for the node need to be recalculated + * // (often not necessary while node partial recalculation is required) + */ + protected int hasDirt; + + /** + * Lengths of the branches in the tree associated with each of the nodes + * in the tree through their node numbers. By comparing whether the + * current branch length differs from stored branch lengths, it is tested + * whether a node is dirty and needs to be recomputed (there may be other + * reasons as well...). + * These lengths take branch rate models in account. + */ + protected double[] m_branchLengths; + protected double[] storedBranchLengths; + + + @Override + public void initAndValidate() throws Exception { + tree = (Tree) treeInput.get(); + data = dataInput.get(); + patternLogLikelihoods = new double[data.getPatternCount()]; + + nPoints = 20; //TODO: read this from the xml + int nNodes = tree.getNodeCount(); + partialIntegral = new double[nPoints][nNodes]; + rootTheta = 0.5; //We should get this from the PARAMETERS. + + hasDirt = Tree.IS_FILTHY; + } + + + /** + * Calculate the log likelihood of the current state. + * + * @return the log likelihood. + */ + @Override + public double calculateLogP() { + + + double logL=0.0; + + for (int thisPattern = 0; thisPattern < data.getPatternCount(); thisPattern++) { + redCounts = data.getPattern(thisPattern); + lineageCounts = data.getPatternLineagCounts(thisPattern); + traverse(tree.getRoot()); //Compute likelihood and puts value in patternLogLikelihood[thisPattern] + + logL += patternLogLikelihoods[thisPattern] * data.getPatternWeight(thisPattern); + } + return logL; + } + + + + /** + * Evaluate a Simpson's type rule on the interval x[0]...x[n-1], where f[i] = f(x[i]) + * @param x array of x values. Assumed to be in strictly increasing order, but need not be regular. + * @param f array of f values with same length as x. + * @return Simpson's estimate of integral of f between x[0] and x[n-1]. + * If the x[i] are regular and n is even then this returns the standard Simpson's estimate. Otherwise, it + * divides the intervals into consecutive pairs. For each pair [a,c][c,b] it determines the integral + * which is exact for all quadratics (as in Simpson's). + * If n is odd then the last interval is evaluated using the the same rule but to evalute the + * last integral using the final three points. + * + */ + double simpsons(double[] x, double[] f) { + int n = x.length; + double total = 0.0; + for (int i = 0;i