From c05e9754a9957831b333e5a312a61e6f6a770261 Mon Sep 17 00:00:00 2001 From: David Edwards Date: Thu, 14 May 2020 09:53:48 +1000 Subject: [PATCH 1/2] Update snppar.py --- scripts/snppar.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/snppar.py b/scripts/snppar.py index b448b85..d40d8bc 100644 --- a/scripts/snppar.py +++ b/scripts/snppar.py @@ -15,7 +15,7 @@ snppar -s snps.csv -g genbank.gb -t tree.tre ''' # -# Last modified - 2/1/2020 +# Last modified - 11/5/2020 # Recent Changes: changed default reporting to homoplasic, not parallel # change of some input commands as a result # added user command to log output @@ -24,6 +24,7 @@ # fix for fastml_execute # simplified and intermediate and complex sorting for TreeTime # further fixing (and testing) of fastml_execute +# removed 'cpickle' option for tree.copy(), 'deepcopy' option insted # To add: mapping using tree and snp table only (i.e. no reference) # @@ -42,7 +43,7 @@ from datetime import datetime # Constants declaration -version = 'V0.4.1dev' +version = 'V0.4.2dev' genefeatures = 'CDS' excludefeatures = 'gene,misc_feature,repeat_region,mobile_element' nt = ['A','C','G','T'] @@ -444,7 +445,7 @@ def addToSNPPatterns(snp,snp_pattern,snp_set,alt_set,na_set,tree,snps_to_map,mon def getNANodes(tree, na_set, node_names): removed_nodes = [] - test_tree = tree.copy() + test_tree = tree.copy("newick") if na_set: for isolate in na_set: if test_tree.search_nodes(name=isolate): From 1c64a3e70b71af2b21fb0c13ad359b6bbde5f857 Mon Sep 17 00:00:00 2001 From: David Edwards Date: Thu, 14 May 2020 10:00:28 +1000 Subject: [PATCH 2/2] other version changes --- README.md | 26 +++++++++++++++----------- setup.py | 2 +- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 9cd8166..2912b0b 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ SNPPar is designed to find homoplasic SNPs based on a user-defined phylogenetic By default, SNPPar uses TreeTime for ancestral state reconstruction (ASR), but using FastML for ASR is also available if FastML is installed (though much, much slower) -Current Version: V0.4.1dev +Current Version: V0.4.2dev # Home: @@ -88,7 +88,7 @@ Note: If any gene is split in the reference (including across the origin of the [-t TREE] [-g GENBANK] [-E SORTING] [-M MUTATION_EVENTS] [-d DIRECTORY] [-p PREFIX] [-P] [-S] [-C] [-R] [-A] [-a] [-n] [-e] [-u] [-f] [-x FASTML_EXECUTE] - SNPPar: Parallel/homoplasic SNP Finder V0.4.1dev + SNPPar: Parallel/homoplasic SNP Finder V0.4.2dev optional arguments: -h, --help show this help message and exit -s SNPTABLE, --snptable SNPTABLE @@ -151,16 +151,20 @@ Note: If any gene is split in the reference (including across the origin of the # SNPPar sorting Three versions of the SNP sorting are available when using TreeTime for ASR - Filtered out from ASR - complex singletons and monophyletic SNPs - (tested against tree) - intermediate (default) same as complex except SNPs with - missing calls sent to ASR (not singletons) - simple singletons only - + + Filtered out before ASR + + complex singletons and monophyletic SNPs + (tested against tree) + + intermediate (default) same as complex except non-singleton SNPs + with missing calls sent to ASR () + + simple singletons only + Complex sorting is the most memory efficient of the three, with simple being about twice as costly (estimate!); intermediate sits somewhere in between (though closer to complex). -Run time is more dependant on missing calls; complex and intermediate sorting are quicker than simple sorting when there are no missing calls. When missing calls are present, complex sorting can be much slower than either simple or intermediate sorting. Intermediate sorting can be faster than simple... (still testing atm) +Run time is more dependant on missing calls; complex and intermediate sorting are quicker than simple sorting when there are no missing calls. When missing calls are present, complex sorting can be much slower than either simple or intermediate sorting. Intermediate sorting is typically faster than simple. Complex sorting may be useful when memory is a problem; simple sorting can be used to if you would prefer all the internal SNPs (i.e. non-singletons) to be mapped using ASR. @@ -234,6 +238,6 @@ Then to run SNPPar:

# Important Note -SNPPar is very accurate (evidence in SNPPar_test very soon!), BUT calls where the ancestor is the root node ('N1') are arbituarly assigned - As such, the output trees have no homoplasic events (parallel, convergent, or revertant) mapped to root node, though the total number of SNPs on each branch is estimated using the ratio of the distance to the child nodes of 'N1'. +SNPPar is very accurate, BUT calls where the ancestor is the root node ('N1') are arbituarly assigned. As such, the output trees have no homoplasic events (parallel, convergent, or revertant) mapped to root node, though the total number of SNPs on each branch is estimated using the ratio of the distance to the child nodes of 'N1'. When a homoplasic event does occur at the root node and is removed, if there is only one other mutation event at the same SNP position, that mutation event is *not* removed from the tree. Keep this in mind when interpreting the tree output. diff --git a/setup.py b/setup.py index 01874af..3914926 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ setup( name='snppar', - version='0.4.1dev', + version='0.4.2dev', author='David Edwards', author_email='David.Edwards@monash.edu', packages=['snppar'],