diff --git a/R/import_fasta_sparse_nt.R b/R/import_fasta_sparse_nt.R index 889fe48..493c668 100644 --- a/R/import_fasta_sparse_nt.R +++ b/R/import_fasta_sparse_nt.R @@ -35,7 +35,14 @@ import_fasta_sparse_nt <- function(fasta, prior='baps', check.fasta=TRUE){ if(class(fasta)=="DNAbin"){ fasta <- as.character(as.matrix(fasta)) - ij <- which(t(fasta) != fasta[1,], arr.ind = TRUE) + seqnames <- rownames(fasta) + + cons_ref <- c(a=0,c=1,g=2,t=3,`-`=5,`n`=5) + cosensus <- apply(fasta[2:nrow(fasta),,drop=FALSE], 2, function(x){ + tbl <- table(x) + cons_ref[names(tbl)[which.max(tbl)]] + }) + fasta[fasta=='a'] <- 1 fasta[fasta=='c'] <- 2 fasta[fasta=='g'] <- 3 @@ -43,14 +50,16 @@ import_fasta_sparse_nt <- function(fasta, prior='baps', check.fasta=TRUE){ fasta[fasta=='-'] <- 5 fasta[fasta=='n'] <- 5 fasta <- apply(fasta, 2, as.numeric) + + ij <- which(t(fasta) != (cosensus+1), arr.ind = TRUE) snp.data <- list(num.seqs=nrow(fasta), - consensus=fasta[1,]-1, + consensus=cosensus, seq.length=ncol(fasta), seq.names=rownames(fasta)) snp.matrix <- t(sparseMatrix(i=ij[,1], j=ij[,2], x=t(fasta)[ij], dims = c(snp.data$seq.length, snp.data$num.seqs), - dimnames = list(1:snp.data$seq.length, snp.data$seq.names))) + dimnames = list(1:snp.data$seq.length, seqnames))) } else { snp.data <- import_fasta_to_vector_each_nt(fasta) diff --git a/inst/CITATION b/inst/CITATION new file mode 100644 index 0000000..32b47ad --- /dev/null +++ b/inst/CITATION @@ -0,0 +1,17 @@ +citHeader("The best way to cite fastbaps in publications is to use:") + +citEntry(entry = "Article", +title = "Fast hierarchical Bayesian analysis of population structure", +author = personList(as.person("Gerry Tonkin-Hill"), + as.person("John A Lees"), + as.person("Stephen D Bentley"), + as.person("Simon D W Frost"), + as.person("Jukka Corander")), +journal = "Nucleic Acids Res..", +year = "2019", +pages = "1362--4962", +url = "http://dx.doi.org/10.1093/nar/gkz361", + +textVersion = "Tonkin-Hill,G., Lees,J.A., Bentley,S.D., Frost,S.D.W. and Corander,J. (2019) Fast hierarchical Bayesian analysis of population structure. Nucleic Acids Res., 10.1093/nar/gkz361." + + diff --git a/inst/vignette-supp/unnamed-chunk-17-1.png b/inst/vignette-supp/unnamed-chunk-17-1.png index 5183c8a..e3a0a29 100644 Binary files a/inst/vignette-supp/unnamed-chunk-17-1.png and b/inst/vignette-supp/unnamed-chunk-17-1.png differ