-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f1f6707
commit 9cd8494
Showing
8 changed files
with
126 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,9 @@ The tool provides convenient species identification coupled to quality control m | |
As WGS becomes standard within public health and research laboratories, it is important to harness the high throughput and resolution potential of this technology providing accurate and rapid at scale typing of E.coli both in public health, clinical and research contexts. | ||
|
||
## Citation | ||
Bessonov, Kyrylo, Chad Laing, James Robertson, Irene Yong, Kim Ziebell, Victor PJ Gannon, Anil Nichani, Gitanjali Arya, John HE Nash, and Sara Christianson. "ECTyper: in silico Escherichia coli serotype and species prediction from raw and assembled whole-genome sequence data." Microbial genomics 7, no. 12 (2021): 000728. [https://www.microbiologyresearch.org/content/journal/mgen/10.1099/mgen.0.000728](https://www.microbiologyresearch.org/content/journal/mgen/10.1099/mgen.0.000728) | ||
If you find `ectyper` useful, please cite the following paper: | ||
|
||
> Bessonov, Kyrylo, Chad Laing, James Robertson, Irene Yong, Kim Ziebell, Victor PJ Gannon, Anil Nichani, Gitanjali Arya, John HE Nash, and Sara Christianson. **"ECTyper: in silico Escherichia coli serotype and species prediction from raw and assembled whole-genome sequence data."** Microbial genomics 7, no. 12 (2021): 000728. [https://www.microbiologyresearch.org/content/journal/mgen/10.1099/mgen.0.000728](https://www.microbiologyresearch.org/content/journal/mgen/10.1099/mgen.0.000728) | ||
## Contact | ||
For any questions, issues or comments please make a Github issue or reach out to [Kyrylo Bessonov]([email protected]). | ||
|
@@ -328,6 +330,7 @@ Some O-antigens display very high degree of homology and are very hard to discer | |
|[Galaxy Europe](https://usegalaxy.eu/root?tool_id=ectyper)| Galaxy public server to execute your analysis from anywhere|Web-based| | ||
|[IRIDA plugin](https://github.com/phac-nml/irida-plugin-ectyper)| IRIDA instances could easily install additional pipeline|Web-based| | ||
|
||
|
||
# Legal and Compliance Information | ||
|
||
Copyright Government of Canada 2024 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
import sys | ||
import pytest | ||
import tempfile | ||
import os | ||
from ectyper import ectyper, definitions | ||
import subprocess | ||
import pandas as pd | ||
import logging | ||
import re | ||
|
||
TEST_ROOT = os.path.dirname(__file__) | ||
LOG=logging.getLogger("TEST") | ||
LOG.setLevel(logging.INFO) | ||
|
||
def set_input(input, | ||
percent_iden=None, | ||
verify=True, | ||
output=tempfile.mkdtemp(), | ||
cores=1, | ||
debug=False, | ||
pathotype = False): | ||
""" | ||
Create the sys.argv[] without need for commandline input. | ||
:param input: Input file given by testing function | ||
:param percent_iden: Percent identity for comparison | ||
:param output: Location of output | ||
:return: None | ||
""" | ||
args = ['-i', input, | ||
'-c', str(cores), | ||
] | ||
|
||
if percent_iden: | ||
args += ['-d', str(percent_iden)] | ||
if verify: | ||
args += ['--verify'] | ||
if output: | ||
args += ['-o', output] | ||
if debug: | ||
args+=['--debug'] | ||
if pathotype: | ||
args+=['--pathotype'] | ||
|
||
sys.argv[1:] = args | ||
|
||
|
||
def test_single_stx2_subtyping(caplog): | ||
caplog.set_level(logging.DEBUG) | ||
file = os.path.join(TEST_ROOT,'Data/EscherichiaO28H5.fasta') | ||
tmpdir = tempfile.mkdtemp() | ||
set_input(input=file, cores=4, verify=True, debug=False, output=tmpdir, pathotype=True) | ||
ectyper.run_program() | ||
with open(os.path.join(tmpdir,"output.tsv")) as outfp: | ||
secondrow = outfp.readlines()[1] | ||
assert "STEC" in secondrow | ||
assert "stx2a" in secondrow | ||
|
||
def test_stx1_stx2_subtyping_pathotyping(caplog): | ||
caplog.set_level(logging.DEBUG) | ||
file = os.path.join(TEST_ROOT,'Data/Escherichia.fna') | ||
tmpdir = tempfile.mkdtemp() | ||
set_input(input=file, cores=4, verify=True, debug=True, output=tmpdir, pathotype=True) | ||
ectyper.run_program() | ||
with open(os.path.join(tmpdir,"output.tsv")) as outfp: | ||
secondrow = outfp.readlines()[1] | ||
assert "EHEC" in secondrow | ||
assert "stx1a;stx2a" in secondrow | ||
assert "AP010958.1;AP010958.1" in secondrow | ||
|
||
|
||
def test_multi_stx_non_overlap_ranges(caplog): | ||
caplog.set_level(logging.DEBUG) | ||
file = os.path.join(TEST_ROOT,'Data/CP041431_STEC316.fasta.gz') | ||
tmpdir = tempfile.mkdtemp() | ||
set_input(input=file, cores=4, verify=True, debug=False, output=tmpdir, pathotype=True) | ||
ectyper.run_program() | ||
with open(os.path.join(tmpdir,"output.tsv")) as outfp: | ||
secondrow = outfp.readlines()[1] | ||
assert "STEC" in secondrow | ||
assert "stx2e" in secondrow | ||
assert "stx2k" in secondrow | ||
|
||
def test_multi_stx_non_overlap_different_contigs(caplog): | ||
caplog.set_level(logging.DEBUG) | ||
file = os.path.join(TEST_ROOT,'Data/SRR7947260.fasta.gz') | ||
tmpdir = tempfile.mkdtemp() | ||
set_input(input=file, cores=4, verify=True, debug=False, output=tmpdir, pathotype=True) | ||
ectyper.run_program() | ||
with open(os.path.join(tmpdir,"output.tsv")) as outfp: | ||
secondrow = outfp.readlines()[1] | ||
assert "ETEC/STEC" in secondrow | ||
assert "stx2a" in secondrow | ||
assert "stx2g" in secondrow | ||
assert "contig00064;contig00074" in secondrow | ||
|
||
|
||
def test_multi_stx_overlap_same_contig(caplog): | ||
caplog.set_level(logging.DEBUG) | ||
file = os.path.join(TEST_ROOT,'Data/SRR7612273.fasta.gz') | ||
tmpdir = tempfile.mkdtemp() | ||
set_input(input=file, cores=4, verify=True, debug=True, output=tmpdir, pathotype=True) | ||
ectyper.run_program() | ||
with open(os.path.join(tmpdir,"output.tsv")) as outfp: | ||
secondrow = outfp.readlines()[1] | ||
assert "STEC" in secondrow | ||
assert "stx2a" in secondrow | ||
assert "stx2d" in secondrow | ||
assert "contig00078;contig00078" in secondrow | ||
|