Skip to content

Commit

Permalink
Random quartet sampling required
Browse files Browse the repository at this point in the history
Still doesn't quite align – why not?
  • Loading branch information
ms609 committed Feb 13, 2025
1 parent 85693aa commit a6ad8f8
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 62 deletions.
133 changes: 77 additions & 56 deletions R/Concordance.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@
#' @family split support functions
#' @export
# TODO support `method = c("split", "sitemean", "minh")`
QuartetConcordance <- function (tree, dataset = NULL, method = "split") {
QuartetConcordance <- function (tree, dataset = NULL, method = "split",
n = 250) {
if (is.null(dataset)) {
warning("Cannot calculate concordance without `dataset`.")
return(NULL)
Expand All @@ -116,7 +117,7 @@ QuartetConcordance <- function (tree, dataset = NULL, method = "split") {
dataset <- dataset[tipLabels, drop = FALSE]
characters <- PhyDatToMatrix(dataset, ambigNA = TRUE)

if (method %in% c("minh", "iqtree")) {
if (method %in% c("minh", "minhq", "iqtree")) {
if (attr(tree, "order") != "preorder") {
stop("Tree must be in preorder; try `tree <- Preorder(tree)`")
}
Expand Down Expand Up @@ -162,68 +163,88 @@ QuartetConcordance <- function (tree, dataset = NULL, method = "split") {

cli_progress_bar(name = "Quartet concordance", total = dim(quarters)[[2]])
on.exit(cli_progress_done(.envir = parent.frame(2)))
concDeci <- apply(quarters, 2, function (q) {
#cli_progress_update(1, .envir = parent.frame(2))
quarts <- apply(characters, 2, function (char) {
tab <- table(q, char)
nCol <- dim(tab)[[2]]
if (nCol > 1L) {
# RULES:
# We must pick one leaf per row
# Only parsimony informative are decisive
# To be concordant, leaves 1 and 2 must come from the same column

# Example table:
# q 0 1 2
# A 2 3 0
# B 2 3 2
# C 0 1 2
# D 2 1 2
#

concordant <- sum(apply(combn(seq_len(nCol), 2), 2, function(ij) {
i <- ij[[1]]
j <- ij[[2]]
# Taxa from A and B from column i, C and D from j
prod(tab[1:2, i], tab[3:4, j]) +
# Taxa from A and B from column j, C and D from i
prod(tab[1:2, j], tab[3:4, i])
}))

if (method == "minh") {
# To be parsimony informative, we must pick two leaves from each of
# two columns
if (method == "minh") {
sCF <- apply(quarters, 2, function(abcd) {
resample <- function(x) x[sample.int(length(x), 1)]
leaves <- replicate(n, {
vapply(0:3, function(q) resample(which(abcd == q)), integer(1))
})
cfQ <- apply(leaves, 2, function(q) {
qCh <- `mode<-`(characters[q, , drop = FALSE], "numeric")
concordant <- sum(qCh[1, ] == qCh[2, ] &
qCh[2, ] != qCh[3, ] &
qCh[3, ] == qCh[4, ])
decisive <- dim(qCh)[[2]]
decisive <- sum(colSums(apply(qCh + 1, 2, tabulate, 4) >= 2) > 1)
decisive <- sum(colSums(apply(qCh + 1, 2, tabulate, 4) >= 2) > 0)
concordant / decisive
})
mean(cfQ, na.rm = TRUE)
})
} else {
concDeci <- apply(quarters, 2, function (q) {
#cli_progress_update(1, .envir = parent.frame(2))
quarts <- apply(characters, 2, function (char) {
tab <- table(q, char)
nCol <- dim(tab)[[2]]
if (nCol > 1L) {
# RULES:
# We must pick one leaf per row
# Only parsimony informative are decisive
# To be concordant, leaves 1 and 2 must come from the same column

# Example table:
# q 0 1 2
# A 2 3 0
# B 2 3 2
# C 0 1 2
# D 2 1 2
#

decisive <- sum(apply(combn(seq_len(nCol), 2), 2, function(ij) {
# With three columns, we'll encounter i = 1, 1, 2; j = 2, 3, 3
concordant <- sum(apply(combn(seq_len(nCol), 2), 2, function(ij) {
i <- ij[[1]]
j <- ij[[2]]
sum(apply(combn(4, 2), 2, function(kl) {
# We can pick taxa AB, AC, AD, BC, BD, CD from column i,
# and the other pair from col j
pair1 <- kl
pair2 <- (1:4)[-kl]
prod(tab[pair1, i], tab[pair2, j])
}))
# Taxa from A and B from column i, C and D from j
prod(tab[1:2, i], tab[3:4, j]) +
# Taxa from A and B from column j, C and D from i
prod(tab[1:2, j], tab[3:4, i])
}))
} else {
# IQ-TREE seems to use "variant" in place of "parsimony informative"

nPatterns <- prod(rowSums(tab))
allSame <- sum(apply(tab, 2, prod))

decisive <- nPatterns# - allSame

if (method == "minhq") {
# To be parsimony informative, we must pick two leaves from each of
# two columns


decisive <- sum(apply(combn(seq_len(nCol), 2), 2, function(ij) {
# With three columns, we'll encounter i = 1, 1, 2; j = 2, 3, 3
i <- ij[[1]]
j <- ij[[2]]
sum(apply(combn(4, 2), 2, function(kl) {
# We can pick taxa AB, AC, AD, BC, BD, CD from column i,
# and the other pair from col j
pair1 <- kl
pair2 <- (1:4)[-kl]
prod(tab[pair1, i], tab[pair2, j])
}))
}))
} else {
# IQ-TREE seems to use "variant" in place of "parsimony informative"

nPatterns <- prod(rowSums(tab))
allSame <- sum(apply(tab, 2, prod))

decisive <- nPatterns# - allSame

}
c(concordant, decisive)
} else {
c(0L, 0L)
}
c(concordant, decisive)
} else {
c(0L, 0L)
}
})
rowSums(quarts)
})
rowSums(quarts)
})
concDeci[1, ] / concDeci[2, ]
concDeci[1, ] / concDeci[2, ]
}
} else {
splits <- as.Splits(tree, dataset)
logiSplits <- vapply(seq_along(splits), function (i) as.logical(splits[[i]]),
Expand Down
2 changes: 1 addition & 1 deletion R/data_manipulation.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#'
#' - `informative`: logical specifying which characters contain any
#' phylogenetic information.
#'
#'
#' - `bootstrap`: The character vector
#' \code{c("info.amounts", "split.sizes")}, indicating attributes to sample
#' when bootstrapping the dataset (e.g. in Ratchet searches).
Expand Down
47 changes: 42 additions & 5 deletions tests/testthat/test-Concordance.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ test_that("_Concordance() handles tip mismatch", {
"No overlap between tree labels and dataset.")
})

test_that("QuartetConcordance(method = minh)", {
test_that("QuartetConcordance(method = minhq)", {
tree <- ape::read.tree(text = "(a, (b, (c, (d, ((e, f), (g, h))))));")
mataset <- matrix(c(0, 0, 0, 0, 1, 1, 1, 1, 0,
0, 0, 1, 1, 1, 1, 1, 1, 0,
Expand All @@ -29,15 +29,15 @@ test_that("QuartetConcordance(method = minh)", {
dimnames = list(letters[1:9], NULL))
dat <- MatrixToPhyDat(mataset)

expect_error(QuartetConcordance(tree, dat, method = "minh"),
expect_error(QuartetConcordance(tree, dat, method = "minhq"),
"must be in preorder")
tree <- Preorder(tree)

# plot(tree); nodelabels();
expect_concordance <- function(i, expectation, iq = "minh") {
expect_concordance <- function(i, expectation, iq = "minhq") {
expect_equal(
QuartetConcordance(tree, dat[, i],
method = ifelse(iq == "iq", "iqtree", "minh")),
method = ifelse(iq == "iq", "iqtree", "minhq")),
setNames(expectation, 11:15))
}
# Expectations computed by working through tables manually
Expand All @@ -52,7 +52,7 @@ test_that("QuartetConcordance(method = minh)", {
expect_concordance(9, c(NaN, 0, 1 / 2, 0, NaN))

# Values calculated from summing results above
expect_equal(unname(QuartetConcordance(tree, dat, method = "minh")),
expect_equal(unname(QuartetConcordance(tree, dat, method = "minhq")),
c(5 + 3 + 1,
0,
12 + 8 + 4 + 1,
Expand All @@ -76,6 +76,43 @@ test_that("QuartetConcordance(method = minh)", {
expect_concordance(iq = "iq", 9, c(0, 0, 1 / 12, 0, 0))
expect_equal(unname(QuartetConcordance(tree, dat, method = "iqtree")),
c(56.7, 0, 85.4, 0, 62.5) / 100, tolerance = 0.01)
})

test_that("QuartetConcordance(method = minh)", {
tree <- Preorder(
ape::read.tree(text = "(a, (b, (c, (d, ((e, f), (g, h))))));"))
mataset <- matrix(c(0, 0, 0, 0, 1, 1, 1, 1, 0,
0, 0, 1, 1, 1, 1, 1, 1, 0,
0, 0, 1, 1, 1, 1, 2, 2, 0,
1, 0, 0, 0, 1, 1, 1, 1, 0,
1, 0, 0, 0, 0, 1, 1, 1, 0,
0, 0, 0, 0, 1, 1, 2, 2, 0,
0, 0, 1, 1, 2, 2, 3, 3, 0,
0, 1, 2, 3, 0, 1, 2, 3, 0,
0, 1, 2, 0, 0, 2, 2, 3, 0), 9,
dimnames = list(letters[1:9], NULL))
dat <- MatrixToPhyDat(mataset)

# plot(tree); nodelabels();
expect_concordance <- function(i, expectation) {
expect_equal(
QuartetConcordance(tree, dat[, i], method = "minh", n = 1250),
setNames(expectation, 11:15), tolerance = 0.05)
}

# Expectations computed by iq-tree
expect_concordance(1, c(0, 0, 1, 0, 0))
expect_concordance(2, c(1, 0, 0, 0, 0))
expect_concordance(3, c(0.6, 0, 0, 0, 0.5))
expect_concordance(4, c(0, 0, 2 / 3, 0, 0))
expect_concordance(5, c(0, 0, 1 / 3, 0, 3 / 8))
expect_concordance(6, c(0, 0, 0, 0, 0))
expect_concordance(7, c(1 / 5, 0, 0, 0, 0))
expect_concordance(8, rep(0, 5))
expect_concordance(9, c(0, 0, 1 / 12, 0, 0))
expect_equal(tolerance = 0.05,
unname(QuartetConcordance(tree, dat, method = "minh", n = 1234)),
c(56.7, 0, 85.4, 0, 62.5) / 100)

})

Expand Down

0 comments on commit a6ad8f8

Please sign in to comment.