Skip to content

Commit

Permalink
MAINT: adjust regex for genome data semantic types (#322)
Browse files Browse the repository at this point in the history
  • Loading branch information
Sann5 authored May 2, 2024
1 parent 5713965 commit 2883160
Show file tree
Hide file tree
Showing 22 changed files with 22 additions and 155 deletions.
12 changes: 6 additions & 6 deletions q2_types/genome_data/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,21 @@ def _validate_(self, level):


class GenesDirectoryFormat(model.DirectoryFormat):
genes = model.FileCollection(r'(.*\_)?genes[0-9]*\.(fa|fna|fasta)$',
genes = model.FileCollection(r'.+\.(fa|fna|fasta)$',
format=DNAFASTAFormat)

@genes.set_path_maker
def genes_path_maker(self, genome_id):
return '%s_genes.fasta' % genome_id
return '%s.fasta' % genome_id


class ProteinsDirectoryFormat(model.DirectoryFormat):
proteins = model.FileCollection(r'(.*\_)?proteins[0-9]*\.(fa|faa|fasta)$',
proteins = model.FileCollection(r'.+\.(fa|faa|fasta)$',
format=ProteinFASTAFormat)

@proteins.set_path_maker
def proteins_path_maker(self, genome_id):
return '%s_proteins.fasta' % genome_id
return '%s.fasta' % genome_id


class GFF3Format(model.TextFileFormat):
Expand Down Expand Up @@ -160,12 +160,12 @@ def _validate_(self, level):


class LociDirectoryFormat(model.DirectoryFormat):
loci = model.FileCollection(r'(.*\_)?loci[0-9]*\.gff$',
loci = model.FileCollection(r'.+\.gff$',
format=GFF3Format)

@loci.set_path_maker
def loci_path_maker(self, genome_id):
return '%s_loci.gff' % genome_id
return '%s.gff' % genome_id


plugin.register_formats(
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

56 changes: 4 additions & 52 deletions q2_types/genome_data/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,77 +54,29 @@ def test_seed_ortholog_dir_fmt_collection(self):
obj.validate()

def test_genes_dirfmt_fa_with_suffix(self):
dirpath = self.get_data_path('genes-with-suffix')
dirpath = self.get_data_path('genes')
fmt = GenesDirectoryFormat(dirpath, mode='r')

fmt.validate()

def test_genes_dirfmt_fa_with_prefix(self):
dirpath = self.get_data_path('genes-with-prefix')
fmt = GenesDirectoryFormat(dirpath, mode='r')

fmt.validate()

def test_genes_dirfmt_fa_with_wrong_prefix(self):
dirpath = self.get_data_path('genes-with-wrong-prefix')
fmt = GenesDirectoryFormat(dirpath, mode='r')

with self.assertRaisesRegex(
ValidationError,
'Missing one or more files for GenesDirectoryFormat'
):
fmt.validate()

def test_proteins_dirfmt_fa_with_suffix(self):
dirpath = self.get_data_path('proteins-with-suffix')
fmt = ProteinsDirectoryFormat(dirpath, mode='r')

fmt.validate()

def test_proteins_dirfmt_fa_with_prefix(self):
dirpath = self.get_data_path('proteins-with-prefix')
dirpath = self.get_data_path('proteins')
fmt = ProteinsDirectoryFormat(dirpath, mode='r')

fmt.validate()

def test_proteins_dirfmt_fa_with_wrong_prefix(self):
dirpath = self.get_data_path('proteins-with-wrong-prefix')
fmt = ProteinsDirectoryFormat(dirpath, mode='r')

with self.assertRaisesRegex(
ValidationError,
'Missing one or more files for ProteinsDirectoryFormat'
):
fmt.validate()

def test_gff_format_positive_with_suffix(self):
filepath = self.get_data_path('loci-with-suffix/loci1.gff')
filepath = self.get_data_path('loci/loci1.gff')
fmt = GFF3Format(filepath, mode='r')

fmt.validate()

def test_loci_dirfmt_with_suffix(self):
dirpath = self.get_data_path('loci-with-suffix')
fmt = LociDirectoryFormat(dirpath, mode='r')

fmt.validate()

def test_loci_dirfmt_with_prefix(self):
dirpath = self.get_data_path('loci-with-prefix')
dirpath = self.get_data_path('loci')
fmt = LociDirectoryFormat(dirpath, mode='r')

fmt.validate()

def test_loci_dirfmt_with_wrong_prefix(self):
dirpath = self.get_data_path('loci-with-wrong-prefix')
fmt = LociDirectoryFormat(dirpath, mode='r')

with self.assertRaisesRegex(
ValidationError,
'Missing one or more files for LociDirectoryFormat'
):
fmt.validate()

def test_gff_format_wrong_version(self):
filepath = self.get_data_path('loci-invalid/loci-wrong-version.gff')
with self.assertRaisesRegex(
Expand Down
12 changes: 6 additions & 6 deletions q2_types/genome_data/tests/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ def seqs_to_df(seqs):
def test_genes_to_dataframe(self):
_, obs = self.transform_format(GenesDirectoryFormat, pd.DataFrame,
filenames=[
'genes-with-suffix/genes1.fa',
'genes-with-suffix/genes2.fa'
'genes/genes1.fa',
'genes/genes2.fa'
])
exp = self.seqs_to_df(self.genes)
pd.testing.assert_frame_equal(exp, obs)
Expand All @@ -82,8 +82,8 @@ def test_proteins_to_dataframe(self):
ProteinsDirectoryFormat,
pd.DataFrame,
filenames=[
'proteins-with-suffix/proteins1.faa',
'proteins-with-suffix/proteins2.faa'
'proteins/proteins1.faa',
'proteins/proteins2.faa'
])
exp = self.seqs_to_df(self.proteins)
pd.testing.assert_frame_equal(exp, obs)
Expand All @@ -100,7 +100,7 @@ def test_gff_to_interval_metadata_iterator(self):
input, obs = self.transform_format(
GFF3Format,
IntervalMetadataIterator,
filename='loci-with-suffix/loci1.gff')
filename='loci/loci1.gff')
exp = skbio.io.read(str(input), format='gff3')

for o, e in zip(obs, exp):
Expand All @@ -109,7 +109,7 @@ def test_gff_to_interval_metadata_iterator(self):
def test_interval_metadata_iterator_to_gff(self):
transformer = self.get_transformer(IntervalMetadataIterator,
GFF3Format)
filepath = self.get_data_path('loci-with-suffix/loci1.gff')
filepath = self.get_data_path('loci/loci1.gff')
generator = skbio.io.read(filepath, format='gff3')
input = IntervalMetadataIterator(generator)

Expand Down
13 changes: 6 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,12 @@
['data/*', 'data/*/*',
'data/mags-fa/*', 'data/mags-fasta/*'],
'q2_types.genome_data.tests':
['data/*/', 'data/genes-with-prefix/*',
'data/genes-with-suffix/*', 'data/genes-with-wrong-prefix/*',
'data/loci-invalid/*', 'data/loci-with-prefix/*',
'data/loci-with-suffix/*', 'data/loci-with-wrong-prefix/*',
'data/ortholog/*', 'data/proteins-with-suffix/*',
'data/proteins-with-prefix/*',
'data/proteins-with-wrong-prefix/*',
['data/*/',
'data/genes/*',
'data/loci-invalid/*',
'data/loci/*',
'data/ortholog/*',
'data/proteins/*',
],
'q2_types.kraken2.tests': [
'data/*',
Expand Down

0 comments on commit 2883160

Please sign in to comment.