From b5d3b63016e7a1717888f96b28361ed362cb90e3 Mon Sep 17 00:00:00 2001 From: Gerry Tonkin-Hill Date: Fri, 8 Nov 2019 11:24:09 +0100 Subject: [PATCH] added in function to return the mll of the root node of a hierarchy --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/boot_fast_baps.R | 3 ++- R/fix_clusters.R | 2 ++ R/root_mll.R | 35 +++++++++++++++++++++++++++++++++++ fastbaps.Rproj | 1 + man/boot_fast_baps.Rd | 3 ++- man/fix_clusters.Rd | 2 ++ 8 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 R/root_mll.R diff --git a/DESCRIPTION b/DESCRIPTION index 5de1c1b..201973a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: fastbaps Title: A fast genetic clustering algorithm that approximates a Dirichlet Process Mixture model Version: 1.0.0 -Authors@R: person("Gerry", "Tonkin-Hill", email = "gt20@cam.ac.uk", role = c("aut", "cre")) +Authors@R: person("Gerry", "Tonkin-Hill", email = "g.tonkinhill@gmail.com", role = c("aut", "cre")) Description: Takes a multiple sequence alignment as input and clusters according to the 'no-admixture' model. It combines ideas from the Bayesian Hierarchical Clustering algorithm of Heller et al. and hierBAPS to produce a rapid and accurate clustering algorithm. diff --git a/NAMESPACE b/NAMESPACE index 168e988..782edcc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(import_fasta_sparse_nt) export(multi_level_best_baps_partition) export(multi_res_baps) export(optimise_prior) +export(root_mll) export(snp_dist) export(snp_similarity) import(Matrix) diff --git a/R/boot_fast_baps.R b/R/boot_fast_baps.R index 0e25c8a..318eb73 100644 --- a/R/boot_fast_baps.R +++ b/R/boot_fast_baps.R @@ -16,12 +16,13 @@ #' #' @examples #' +#' \dontrun{ #' fasta.file.name <- system.file("extdata", "seqs.fa", package = "fastbaps") #' sparse.data <- import_fasta_sparse_nt(fasta.file.name) #' boot.result <- boot_fast_baps(sparse.data) #' dendro <- as.dendrogram(fast_baps(sparse.data)) #' heatmap(boot.result, dendro, dendro) -#' +#' } #' #' @export boot_fast_baps <- function(sparse.data, k.init=NULL, n.replicates=100, hc.method='ward', diff --git a/R/fix_clusters.R b/R/fix_clusters.R index 5e7bbc8..64d06e7 100644 --- a/R/fix_clusters.R +++ b/R/fix_clusters.R @@ -13,6 +13,7 @@ #' #' @examples #' +#' \dontrun{ #' fasta.file.name <- system.file("extdata", "seqs.fa", package = "fastbaps") #' sparse.data <- import_fasta_sparse_nt(fasta.file.name) #' sim.matrix <- snp_similarity(sparse.data) @@ -20,6 +21,7 @@ #' phylo <- ape::as.phylo(hclust(x, method="average")) #' clusters <- best_baps_partition(sparse.data, phylo) #' clusters <- fix_clusters(sparse.data, clusters) +#' } #' #' @export fix_clusters <- function(sparse.data, clusters, n.iterations=1, quiet=FALSE){ diff --git a/R/root_mll.R b/R/root_mll.R new file mode 100644 index 0000000..cf185ea --- /dev/null +++ b/R/root_mll.R @@ -0,0 +1,35 @@ +#' root_mll +#' +#' Function to calculate the marginal log likelihood of the root of a hierarchy for use in model comparison. +#' +#' @import Matrix +#' +#' +#' @param sparse.data a sparse SNP data object returned from import_fasta_sparse_nt +#' @param h a hclust object representing the hierarchical clustering +#' @param quiet suppress the printing of extra information (default=FALSE) +#' +#' @return the marginal log likelihood of the root node +#' +#' @examples +#' fasta.file.name <- system.file("extdata", "seqs.fa", package = "fastbaps") +#' fasta.file.name <- system.file("extdata", "seqs.fa", package = "fastbaps") +#' sparse.data <- import_fasta_sparse_nt(fasta.file.name) +#' baps.hc <- fast_baps(sparse.data) +#' root_mll(sparse.data, baps.hc) +#' +#' @export +root_mll <- function(sparse.data, h, quiet=FALSE){ + + # Check inputs + if(!is.list(sparse.data)) stop("Invalid value for sparse.data! Did you use the import_fasta_sparse_nt function?") + if(!(class(sparse.data$snp.matrix)=="dgCMatrix")) stop("Invalid value for sparse.data! Did you use the import_fasta_sparse_nt function?") + if(!is.numeric(sparse.data$consensus)) stop("Invalid value for sparse.data! Did you use the import_fasta_sparse_nt function?") + if(!is.matrix(sparse.data$prior)) stop("Invalid value for sparse.data! Did you use the import_fasta_sparse_nt function?") + if(!(class(h) %in% c("hclust", "phylo"))) stop("Invalid value for h! Should be a hclust or phylo object!") + + llks <- tree_llk(sparse.data, h$merge) + root.mll <- llks$ptree[length(llks$ptree)] + + return(root.mll) +} diff --git a/fastbaps.Rproj b/fastbaps.Rproj index cba1b6b..0ee12cd 100644 --- a/fastbaps.Rproj +++ b/fastbaps.Rproj @@ -18,4 +18,5 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageCheckArgs: --as-cran PackageRoxygenize: rd,collate,namespace diff --git a/man/boot_fast_baps.Rd b/man/boot_fast_baps.Rd index 8cbb344..68b4e16 100644 --- a/man/boot_fast_baps.Rd +++ b/man/boot_fast_baps.Rd @@ -28,11 +28,12 @@ Function to perform bootstrap replicated of fastbaps. } \examples{ +\dontrun{ fasta.file.name <- system.file("extdata", "seqs.fa", package = "fastbaps") sparse.data <- import_fasta_sparse_nt(fasta.file.name) boot.result <- boot_fast_baps(sparse.data) dendro <- as.dendrogram(fast_baps(sparse.data)) heatmap(boot.result, dendro, dendro) - +} } diff --git a/man/fix_clusters.Rd b/man/fix_clusters.Rd index e9b3b70..265c03d 100644 --- a/man/fix_clusters.Rd +++ b/man/fix_clusters.Rd @@ -23,6 +23,7 @@ Function to iteratively update an intially clustering, greedily maximising the B } \examples{ +\dontrun{ fasta.file.name <- system.file("extdata", "seqs.fa", package = "fastbaps") sparse.data <- import_fasta_sparse_nt(fasta.file.name) sim.matrix <- snp_similarity(sparse.data) @@ -30,5 +31,6 @@ x <- as.dist(1-sim.matrix/max(sim.matrix)) phylo <- ape::as.phylo(hclust(x, method="average")) clusters <- best_baps_partition(sparse.data, phylo) clusters <- fix_clusters(sparse.data, clusters) +} }