Skip to content

Commit

Permalink
Merge branch 'qiime2:dev' into st_fetch_busco_iss_74
Browse files Browse the repository at this point in the history
  • Loading branch information
Sann5 authored May 10, 2024
2 parents 46df929 + 940a02d commit 06d0504
Show file tree
Hide file tree
Showing 32 changed files with 106 additions and 205 deletions.
2 changes: 1 addition & 1 deletion q2_types/feature_data/_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def _fastaformats_to_metadata(ff, constructor=skbio.DNA, lowercase=False):

def _series_to_fasta_format(ff, data, sequence_type="DNA", lowercase=False):
with ff.open() as f:
for id_, seq in data.iteritems():
for id_, seq in data.items():
if sequence_type == "protein":
sequence = skbio.Protein(seq, metadata={'id': id_},
lowercase=lowercase)
Expand Down
6 changes: 3 additions & 3 deletions q2_types/feature_data_mag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@
'OrthologAnnotationDirFmt', 'OrthologFileFmt', 'Contig'
]

importlib.import_module('q2_types.feature_data._format')
importlib.import_module('q2_types.feature_data._transformer')
importlib.import_module('q2_types.feature_data._type')
importlib.import_module('q2_types.feature_data_mag._format')
importlib.import_module('q2_types.feature_data_mag._transformer')
importlib.import_module('q2_types.feature_data_mag._type')
2 changes: 1 addition & 1 deletion q2_types/feature_data_mag/_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _get_filename(full_path):
def _series_to_fasta(series, ff, seq_type='DNA'):
fp = os.path.join(str(ff), f'{series.name}.fasta')
with open(fp, 'w') as fh:
for id_, seq in series.iteritems():
for id_, seq in series.items():
if seq:
sequence = CONSTRUCTORS[seq_type](seq, metadata={'id': id_})
skbio.io.write(sequence, format='fasta', into=fh)
Expand Down
8 changes: 7 additions & 1 deletion q2_types/feature_data_mag/_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
)
from qiime2.core.type import SemanticType

from ..per_sample_sequences import ContigSequencesDirFmt
from ..bowtie2 import Bowtie2IndexDirFmt
from ..per_sample_sequences import ContigSequencesDirFmt, SingleBowtie2Index
from ..plugin_setup import plugin


Expand Down Expand Up @@ -55,3 +56,8 @@
plugin.register_artifact_class(
FeatureData[KEGG],
directory_format=OrthologAnnotationDirFmt)

plugin.register_semantic_type_to_format(
FeatureData[SingleBowtie2Index],
artifact_format=Bowtie2IndexDirFmt
)
11 changes: 10 additions & 1 deletion q2_types/feature_data_mag/tests/test_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@

import unittest

from q2_types.bowtie2 import Bowtie2IndexDirFmt
from q2_types.feature_data import FeatureData
from qiime2.plugin.testing import TestPluginBase

from q2_types.feature_data_mag import (
MAG, MAGSequencesDirFmt, OrthologAnnotationDirFmt,
NOG, OG, KEGG, Contig
)
from q2_types.per_sample_sequences import ContigSequencesDirFmt
from q2_types.per_sample_sequences import (
ContigSequencesDirFmt, SingleBowtie2Index
)


class TestTypes(TestPluginBase):
Expand Down Expand Up @@ -63,6 +66,12 @@ def test_kegg_registered_to_format(self):
FeatureData[KEGG],
OrthologAnnotationDirFmt)

def test_bowtie_index_semantic_type_to_format_registration(self):
self.assertSemanticTypeRegisteredToFormat(
FeatureData[SingleBowtie2Index],
Bowtie2IndexDirFmt
)


if __name__ == '__main__':
unittest.main()
12 changes: 6 additions & 6 deletions q2_types/genome_data/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,21 @@ def _validate_(self, level):


class GenesDirectoryFormat(model.DirectoryFormat):
genes = model.FileCollection(r'(.*\_)?genes[0-9]*\.(fa|fna|fasta)$',
genes = model.FileCollection(r'.+\.(fa|fna|fasta)$',
format=DNAFASTAFormat)

@genes.set_path_maker
def genes_path_maker(self, genome_id):
return '%s_genes.fasta' % genome_id
return '%s.fasta' % genome_id


class ProteinsDirectoryFormat(model.DirectoryFormat):
proteins = model.FileCollection(r'(.*\_)?proteins[0-9]*\.(fa|faa|fasta)$',
proteins = model.FileCollection(r'.+\.(fa|faa|fasta)$',
format=ProteinFASTAFormat)

@proteins.set_path_maker
def proteins_path_maker(self, genome_id):
return '%s_proteins.fasta' % genome_id
return '%s.fasta' % genome_id


class GFF3Format(model.TextFileFormat):
Expand Down Expand Up @@ -160,12 +160,12 @@ def _validate_(self, level):


class LociDirectoryFormat(model.DirectoryFormat):
loci = model.FileCollection(r'(.*\_)?loci[0-9]*\.gff$',
loci = model.FileCollection(r'.+\.gff$',
format=GFF3Format)

@loci.set_path_maker
def loci_path_maker(self, genome_id):
return '%s_loci.gff' % genome_id
return '%s.gff' % genome_id


plugin.register_formats(
Expand Down
2 changes: 1 addition & 1 deletion q2_types/genome_data/_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def _8(ortholog_file: OrthologFileFmt) -> pd.DataFrame:
def _series_to_fasta(series, ff, seq_type='DNA'):
fp = os.path.join(ff.path, f'{series.name}.fasta')
with open(fp, 'w') as fh:
for id_, seq in series.iteritems():
for id_, seq in series.items():
if seq:
sequence = CONSTRUCTORS[seq_type](seq, metadata={'id': id_})
skbio.io.write(sequence, format='fasta', into=fh)
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

56 changes: 4 additions & 52 deletions q2_types/genome_data/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,77 +54,29 @@ def test_seed_ortholog_dir_fmt_collection(self):
obj.validate()

def test_genes_dirfmt_fa_with_suffix(self):
dirpath = self.get_data_path('genes-with-suffix')
dirpath = self.get_data_path('genes')
fmt = GenesDirectoryFormat(dirpath, mode='r')

fmt.validate()

def test_genes_dirfmt_fa_with_prefix(self):
dirpath = self.get_data_path('genes-with-prefix')
fmt = GenesDirectoryFormat(dirpath, mode='r')

fmt.validate()

def test_genes_dirfmt_fa_with_wrong_prefix(self):
dirpath = self.get_data_path('genes-with-wrong-prefix')
fmt = GenesDirectoryFormat(dirpath, mode='r')

with self.assertRaisesRegex(
ValidationError,
'Missing one or more files for GenesDirectoryFormat'
):
fmt.validate()

def test_proteins_dirfmt_fa_with_suffix(self):
dirpath = self.get_data_path('proteins-with-suffix')
fmt = ProteinsDirectoryFormat(dirpath, mode='r')

fmt.validate()

def test_proteins_dirfmt_fa_with_prefix(self):
dirpath = self.get_data_path('proteins-with-prefix')
dirpath = self.get_data_path('proteins')
fmt = ProteinsDirectoryFormat(dirpath, mode='r')

fmt.validate()

def test_proteins_dirfmt_fa_with_wrong_prefix(self):
dirpath = self.get_data_path('proteins-with-wrong-prefix')
fmt = ProteinsDirectoryFormat(dirpath, mode='r')

with self.assertRaisesRegex(
ValidationError,
'Missing one or more files for ProteinsDirectoryFormat'
):
fmt.validate()

def test_gff_format_positive_with_suffix(self):
filepath = self.get_data_path('loci-with-suffix/loci1.gff')
filepath = self.get_data_path('loci/loci1.gff')
fmt = GFF3Format(filepath, mode='r')

fmt.validate()

def test_loci_dirfmt_with_suffix(self):
dirpath = self.get_data_path('loci-with-suffix')
fmt = LociDirectoryFormat(dirpath, mode='r')

fmt.validate()

def test_loci_dirfmt_with_prefix(self):
dirpath = self.get_data_path('loci-with-prefix')
dirpath = self.get_data_path('loci')
fmt = LociDirectoryFormat(dirpath, mode='r')

fmt.validate()

def test_loci_dirfmt_with_wrong_prefix(self):
dirpath = self.get_data_path('loci-with-wrong-prefix')
fmt = LociDirectoryFormat(dirpath, mode='r')

with self.assertRaisesRegex(
ValidationError,
'Missing one or more files for LociDirectoryFormat'
):
fmt.validate()

def test_gff_format_wrong_version(self):
filepath = self.get_data_path('loci-invalid/loci-wrong-version.gff')
with self.assertRaisesRegex(
Expand Down
12 changes: 6 additions & 6 deletions q2_types/genome_data/tests/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ def seqs_to_df(seqs):
def test_genes_to_dataframe(self):
_, obs = self.transform_format(GenesDirectoryFormat, pd.DataFrame,
filenames=[
'genes-with-suffix/genes1.fa',
'genes-with-suffix/genes2.fa'
'genes/genes1.fa',
'genes/genes2.fa'
])
exp = self.seqs_to_df(self.genes)
pd.testing.assert_frame_equal(exp, obs)
Expand All @@ -82,8 +82,8 @@ def test_proteins_to_dataframe(self):
ProteinsDirectoryFormat,
pd.DataFrame,
filenames=[
'proteins-with-suffix/proteins1.faa',
'proteins-with-suffix/proteins2.faa'
'proteins/proteins1.faa',
'proteins/proteins2.faa'
])
exp = self.seqs_to_df(self.proteins)
pd.testing.assert_frame_equal(exp, obs)
Expand All @@ -100,7 +100,7 @@ def test_gff_to_interval_metadata_iterator(self):
input, obs = self.transform_format(
GFF3Format,
IntervalMetadataIterator,
filename='loci-with-suffix/loci1.gff')
filename='loci/loci1.gff')
exp = skbio.io.read(str(input), format='gff3')

for o, e in zip(obs, exp):
Expand All @@ -109,7 +109,7 @@ def test_gff_to_interval_metadata_iterator(self):
def test_interval_metadata_iterator_to_gff(self):
transformer = self.get_transformer(IntervalMetadataIterator,
GFF3Format)
filepath = self.get_data_path('loci-with-suffix/loci1.gff')
filepath = self.get_data_path('loci/loci1.gff')
generator = skbio.io.read(filepath, format='gff3')
input = IntervalMetadataIterator(generator)

Expand Down
14 changes: 11 additions & 3 deletions q2_types/per_sample_sequences/_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# ----------------------------------------------------------------------------

from q2_types.bowtie2 import Bowtie2IndexDirFmt
from q2_types.feature_data import BLAST6
from q2_types.feature_data import BLAST6, FeatureData
from qiime2.plugin import SemanticType

from ..genome_data import SeedOrthologDirFmt
Expand All @@ -31,11 +31,15 @@
Contigs = SemanticType(
'Contigs', variant_of=SampleData.field['type'])
SingleBowtie2Index = SemanticType(
'SingleBowtie2Index', variant_of=SampleData.field['type'])
'SingleBowtie2Index',
variant_of=[SampleData.field['type'], FeatureData.field['type']]
)
MultiBowtie2Index = SemanticType(
'MultiBowtie2Index', variant_of=SampleData.field['type'])
AlignmentMap = SemanticType(
'AlignmentMap', variant_of=SampleData.field['type'])
'AlignmentMap',
variant_of=[SampleData.field['type'], FeatureData.field['type']]
)
MultiAlignmentMap = SemanticType(
'MultiAlignmentMap', variant_of=SampleData.field['type'])

Expand Down Expand Up @@ -90,6 +94,10 @@
SampleData[AlignmentMap],
artifact_format=BAMDirFmt
)
plugin.register_semantic_type_to_format(
FeatureData[AlignmentMap],
artifact_format=BAMDirFmt
)
plugin.register_semantic_type_to_format(
SampleData[MultiAlignmentMap],
artifact_format=MultiBAMDirFmt
Expand Down
Loading

0 comments on commit 06d0504

Please sign in to comment.