Skip to content

Commit

Permalink
idmap file fmt + tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Sann5 committed May 23, 2024
1 parent 0d729a3 commit 571d35f
Show file tree
Hide file tree
Showing 7 changed files with 177 additions and 8 deletions.
32 changes: 31 additions & 1 deletion q2_types/reference_db/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,36 @@ def _validate_(self, level):
pass


class HmmerIdmapFileFmt(model.TextFileFormat):
def _validate_(self, level):
with open(str(self), 'r') as file:
# Set the number of rows to be parsed
max_lines = {"min": 100, "max": 10000000}[level]
lines = file.readlines()
for i, line in enumerate(lines, 1):
# Check number of lines parsed so far
if i > max_lines:
break

# Validate line
if not re.match(r'^(\d+) ([A-Z0-9]+)$', line):
raise ValidationError(
f"Invalid line {i}.\n"
f"{line} \n"
"Expected index and an alphanumeric code separated "
"by a single space."
)

# Check index is equal to line number
idx, code = line.rstrip("\n").split(sep=" ")
if not idx == str(i):
raise ValidationError(
f"Invalid line {i}.\n"
f"{line} \n"
f"Expected index {i} but got {idx} instead.\n"
)


class HmmerDirFmt(model.DirectoryFormat):
"""
The <hmmfile>.h3m file contains the profile HMMs
Expand All @@ -320,7 +350,7 @@ class HmmerDirFmt(model.DirectoryFormat):
h3i = model.File(r'.*\.hmm\.h3i', format=HmmerBinaryFileFmt)
h3f = model.File(r'.*\.hmm\.h3f', format=HmmerBinaryFileFmt)
h3p = model.File(r'.*\.hmm\.h3p', format=HmmerBinaryFileFmt)
idmap = model.File(r'.*\.hmm\.idmap', format=HmmerBinaryFileFmt)
idmap = model.File(r'.*\.hmm\.idmap', format=HmmerIdmapFileFmt)
fasta_files = model.FileCollection(
r'.*\.(fa|fasta|faa)$',
format=ProteinFASTAFormat,
Expand Down
19 changes: 19 additions & 0 deletions q2_types/reference_db/tests/data/hmmer/bacteria/bacteria.hmm.idmap
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
1 1FKAT
2 1FIZK
3 1FIY1
4 1FKA5
5 1FIYP
6 1FK7D
7 1FIX5
8 1FKCK
9 1FIXT
10 1FKBX
11 1FIYG
12 1FKAC
13 1FKB9
14 1FK72
15 1FK4H
16 1FK7S
17 1FK66
18 1FK6W
19 1FIXC
19 changes: 19 additions & 0 deletions q2_types/reference_db/tests/data/hmmer/invalid_idmaps/1.hmm.idmap
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
1 1FKAT:"%#@
2 1FIZK
3 1FIY1
4 1FKA5
5 1FIYP
6 1FK7D
7 1FIX5
8 1FKCK
9 1FIXT
10 1FKBX
11 1FIYG
12 1FKAC
13 1FKB9
14 1FK72
15 1FK4H
16 1FK7S
17 1FK66
18 1FK6W
19 1FIXC
19 changes: 19 additions & 0 deletions q2_types/reference_db/tests/data/hmmer/invalid_idmaps/2.hmm.idmap
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
1 1FKAT
2 1FIZK
3 1FIY1
4 1FKA5
5 1FIYP
6 1FK7D
7 1FIX5
8 1FKCK
9 1FIXT
10 1FKBX
11 1FIYG
12 1FKAC
13 1FKB9
14 1FK72
15 1FK4H
16 1FK7S
17 1FK66
18 1FK6W
19 1FIXC
19 changes: 19 additions & 0 deletions q2_types/reference_db/tests/data/hmmer/invalid_idmaps/3.hmm.idmap
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
1 1FKAT
2 1FIZK
3 1FIY1
4 1FKA5
5 1FIYP
6 1FK7D
7 1FIX5
8 1FKCK
9 1FIXT
10 1FKBX
11 1FIYG
12 1FKAC
13 1FKB9
14 1FK72
15 1FK4H
16 1FK7S
17 1FK66
18 1FK6W
20 1FIXC
19 changes: 19 additions & 0 deletions q2_types/reference_db/tests/data/hmmer/invalid_idmaps/4.hmm.idmap
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
1FKAT
2 1FIZK
3 1FIY1
4 1FKA5
5 1FIYP
6 1FK7D
7 1FIX5
8 1FKCK
9 1FIXT
10 1FKBX
11 1FIYG
12 1FKAC
13 1FKB9
14 1FK72
15 1FK4H
16 1FK7S
17 1FK66
18 1FK6W
19 1FIXC
58 changes: 51 additions & 7 deletions q2_types/reference_db/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
DiamondDatabaseFileFmt, DiamondDatabaseDirFmt, EggnogRefBinFileFmt,
EggnogRefDirFmt, NCBITaxonomyNamesFormat, NCBITaxonomyNodesFormat,
NCBITaxonomyDirFmt, NCBITaxonomyBinaryFileFmt,
EggnogProteinSequencesDirFmt, EggnogRefTextFileFmt, HmmerDirFmt
EggnogProteinSequencesDirFmt, EggnogRefTextFileFmt, HmmerDirFmt,
HmmerIdmapFileFmt
)
from qiime2.plugin import ValidationError

Expand Down Expand Up @@ -154,16 +155,59 @@ def test_EggnogRefTextFileFmt_invalid_taxid_lineage(self):
):
fmt_obj.validate()

def test_HmmerDirFmt(self):
fmt = HmmerDirFmt(self.get_data_path("hmmer"), 'r')
def test_HmmerDirFmt_valid(self):
fmt = HmmerDirFmt(self.get_data_path("hmmer/bacteria"), 'r')
fmt.validate()

def test_HmmerDirFmt_invalid_idmap_1(self):
fmt = HmmerIdmapFileFmt(self.get_data_path(
"hmmer/invalid_idmaps/1.hmm.idmap"), 'r'
)
with self.assertRaisesRegex(
ValidationError,
"Expected index and an alphanumeric code separated "
"by a single space."
):
fmt.validate(level="min")

def test_HmmerDirFmt_invalid_idmap_2(self):
fmt = HmmerIdmapFileFmt(self.get_data_path(
"hmmer/invalid_idmaps/2.hmm.idmap"), 'r'
)
with self.assertRaisesRegex(
ValidationError,
"Expected index and an alphanumeric code separated "
"by a single space."
):
fmt.validate(level="min")

def test_HmmerDirFmt_invalid_idmap_3(self):
fmt = HmmerIdmapFileFmt(self.get_data_path(
"hmmer/invalid_idmaps/3.hmm.idmap"), 'r'
)
with self.assertRaisesRegex(
ValidationError,
'Expected index'
):
fmt.validate(level="min")

def test_HmmerDirFmt_invalid_idmap_4(self):
fmt = HmmerIdmapFileFmt(self.get_data_path(
"hmmer/invalid_idmaps/4.hmm.idmap"), 'r'
)
with self.assertRaisesRegex(
ValidationError,
"Expected index and an alphanumeric code separated "
"by a single space."
):
fmt.validate(level="min")

def test_HmmerDirFmt_missing_hmm(self):
with tempfile.TemporaryDirectory() as tmp:
shutil.copytree(
self.get_data_path("hmmer"), tmp, dirs_exist_ok=True
self.get_data_path("hmmer/bacteria"), tmp, dirs_exist_ok=True
)
os.remove(f"{tmp}/bacteria/bacteria.hmm.h3f")
os.remove(f"{tmp}/bacteria.hmm.h3f")
fmt = HmmerDirFmt(tmp, 'r')
with self.assertRaisesRegex(
ValidationError, "Missing one or more files"
Expand All @@ -173,10 +217,10 @@ def test_HmmerDirFmt_missing_hmm(self):
def test_HmmerDirFmt_missing_fa(self):
with tempfile.TemporaryDirectory() as tmp:
shutil.copytree(
self.get_data_path("hmmer"), tmp, dirs_exist_ok=True
self.get_data_path("hmmer/bacteria"), tmp, dirs_exist_ok=True
)
for file in ["a", "b", "b2"]:
os.remove(f"{tmp}/bacteria/{file}.fa")
os.remove(f"{tmp}/{file}.fa")
fmt = HmmerDirFmt(tmp, 'r')
with self.assertRaisesRegex(
ValidationError, "Missing one or more files"
Expand Down

0 comments on commit 571d35f

Please sign in to comment.