diff --git a/q2_amr/amrfinderplus/types/__init__.py b/q2_amr/amrfinderplus/types/__init__.py index c84d73b..1e2e7e1 100644 --- a/q2_amr/amrfinderplus/types/__init__.py +++ b/q2_amr/amrfinderplus/types/__init__.py @@ -6,9 +6,6 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- from q2_amr.amrfinderplus.types._format import ( - AMRFinderPlusAnnotationDirFmt, - AMRFinderPlusAnnotationFormat, - AMRFinderPlusAnnotationsDirFmt, AMRFinderPlusDatabaseDirFmt, BinaryFormat, TextFormat, @@ -16,9 +13,6 @@ __all__ = [ "AMRFinderPlusDatabaseDirFmt", - "AMRFinderPlusAnnotationFormat", - "AMRFinderPlusAnnotationsDirFmt", - "AMRFinderPlusAnnotationDirFmt", "TextFormat", "BinaryFormat", ] diff --git a/q2_amr/amrfinderplus/types/_format.py b/q2_amr/amrfinderplus/types/_format.py index ba03052..25f1564 100644 --- a/q2_amr/amrfinderplus/types/_format.py +++ b/q2_amr/amrfinderplus/types/_format.py @@ -5,10 +5,7 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -import pandas as pd from q2_types.feature_data import MixedCaseDNAFASTAFormat, ProteinFASTAFormat -from q2_types.per_sample_sequences._format import MultiDirValidationMixin -from qiime2.core.exceptions import ValidationError from qiime2.plugin import model @@ -60,67 +57,3 @@ def amr_dna_comp_path_maker(self, species, extension): @amr_dna_tab.set_path_maker def amr_dna_tab_path_maker(self, species): return "AMR_DNA-%s.tab" % species - - -class AMRFinderPlusAnnotationFormat(model.TextFileFormat): - def _validate(self): - header_coordinates = [ - "Protein identifier", - "Contig id", - "Start", - "Stop", - "Strand", - "Gene symbol", - "Sequence name", - "Scope", - "Element type", - "Element subtype", - "Class", - "Subclass", - "Method", - "Target length", - "Reference sequence length", - "% Coverage of reference sequence", - "% Identity to reference sequence", - "Alignment length", - "Accession of closest sequence", - "Name of closest sequence", - "HMM id", - "HMM description", - "Hierarchy node", - ] - header = header_coordinates[:1] + header_coordinates[5:] - try: - header_obs = pd.read_csv(str(self), sep="\t", nrows=0).columns.tolist() - if header != header_obs and header_coordinates != header_obs: - raise ValidationError( - "Header line does not match AMRFinderPlusAnnotationFormat. Must " - "consist of the following values: " - + ", ".join(header_coordinates) - + ".\n\nWhile Contig id, Start, Stop and Strand are optional." - + "\n\nFound instead: " - + ", ".join(header_obs) - ) - except pd.errors.EmptyDataError: - pass - - def _validate_(self, level): - self._validate() - - -class AMRFinderPlusAnnotationsDirFmt(MultiDirValidationMixin, model.DirectoryFormat): - annotation = model.FileCollection( - r".*amr_(annotations|mutations)\.tsv$", format=AMRFinderPlusAnnotationFormat - ) - - @annotation.set_path_maker - def annotation_path_maker(self, sample_id, mag_id): - prefix = f"{sample_id}/{mag_id}_" if mag_id else f"{sample_id}/" - return f"{prefix}amr_annotations.tsv" - - -AMRFinderPlusAnnotationDirFmt = model.SingleFileDirectoryFormat( - "AMRFinderPlusAnnotationDirFmt", - r"amr_(annotations|mutations)\.tsv$", - AMRFinderPlusAnnotationFormat, -) diff --git a/q2_amr/amrfinderplus/types/_type.py b/q2_amr/amrfinderplus/types/_type.py index 13d0e90..680bfcf 100644 --- a/q2_amr/amrfinderplus/types/_type.py +++ b/q2_amr/amrfinderplus/types/_type.py @@ -5,14 +5,6 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -from q2_types.feature_data import FeatureData -from q2_types.sample_data import SampleData from qiime2.core.type import SemanticType AMRFinderPlusDatabase = SemanticType("AMRFinderPlusDatabase") -AMRFinderPlusAnnotations = SemanticType( - "AMRFinderPlusAnnotations", variant_of=SampleData.field["type"] -) -AMRFinderPlusAnnotation = SemanticType( - "AMRFinderPlusAnnotation", variant_of=FeatureData.field["type"] -) diff --git a/q2_amr/amrfinderplus/types/tests/data/annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv b/q2_amr/amrfinderplus/types/tests/data/annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv deleted file mode 100644 index 20e52d1..0000000 --- a/q2_amr/amrfinderplus/types/tests/data/annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d_amr_annotations.tsv +++ /dev/null @@ -1,3 +0,0 @@ -Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node -aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib -blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amr/amrfinderplus/types/tests/data/annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv b/q2_amr/amrfinderplus/types/tests/data/annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv deleted file mode 100644 index 20e52d1..0000000 --- a/q2_amr/amrfinderplus/types/tests/data/annotation/no_coordinates/aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv +++ /dev/null @@ -1,3 +0,0 @@ -Protein identifier Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description Hierarchy node -aph3pp-Ib_partial_5p_neg aph(3'')-Ib aminoglycoside O-phosphotransferase APH(3'')-Ib core AMR AMR AMINOGLYCOSIDE STREPTOMYCIN PARTIALP 225 267 81.27 100.00 217 WP_001082319.1 aminoglycoside O-phosphotransferase APH(3'')-Ib NF032896.1 APH(3'') family aminoglycoside O-phosphotransferase aph(3'')-Ib -blaOXA-436_partial blaOXA OXA-48 family class D beta-lactamase core AMR AMR BETA-LACTAM BETA-LACTAM PARTIALP 233 265 87.92 100.00 233 WP_058842180.1 OXA-48 family carbapenem-hydrolyzing class D beta-lactamase OXA-436 NF012161.0 class D beta-lactamase blaOXA-48_fam diff --git a/q2_amr/amrfinderplus/types/tests/data/annotation_wrong/amr_annotation.tsv b/q2_amr/amrfinderplus/types/tests/data/annotation_wrong/amr_annotation.tsv deleted file mode 100644 index 1f1fa8b..0000000 --- a/q2_amr/amrfinderplus/types/tests/data/annotation_wrong/amr_annotation.tsv +++ /dev/null @@ -1 +0,0 @@ -Incorrect Header 1 Incorrect Header 2 Incorrect Header 3 diff --git a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py index 2b2ea6f..f413052 100644 --- a/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py +++ b/q2_amr/amrfinderplus/types/tests/test_types_formats_transformers.py @@ -5,101 +5,14 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -import os -import tempfile - -from qiime2.core.exceptions import ValidationError from qiime2.plugin.testing import TestPluginBase -from q2_amr.amrfinderplus.types._format import ( - AMRFinderPlusAnnotationDirFmt, - AMRFinderPlusAnnotationFormat, - AMRFinderPlusAnnotationsDirFmt, - AMRFinderPlusDatabaseDirFmt, -) +from q2_amr.amrfinderplus.types._format import AMRFinderPlusDatabaseDirFmt -class TestAMRFinderPlusTypesAndFormats(TestPluginBase): +class TestAMRFinderPlusDatabaseTypesAndFormats(TestPluginBase): package = "q2_amr.amrfinderplus.types.tests" def test_amrfinderplus_database_directory_format_validate_positive(self): format = AMRFinderPlusDatabaseDirFmt(self.get_data_path("database"), mode="r") format.validate() - - def test_amrfinderplus_annotation_format_validate_positive(self): - filepath = self.get_data_path( - "annotation/no_coordinates/" - "aa447c99-ecd9-4c4a-a53b-4df6999815dd_amr_annotations.tsv" - ) - - format = AMRFinderPlusAnnotationFormat(filepath, mode="r") - format.validate() - - def test_amrfinderplus_annotation_format_validate_positive_coordinates(self): - filepath = self.get_data_path( - "annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d" - "_amr_annotations.tsv" - ) - format = AMRFinderPlusAnnotationFormat(filepath, mode="r") - format.validate() - - def test_amrfinderplus_annotation_format_validate_positive_empty(self): - with tempfile.TemporaryDirectory() as temp_dir: - temp_file_path = os.path.join(temp_dir, "amr_annotations.tsv") - with open(temp_file_path, "w"): - pass - format = AMRFinderPlusAnnotationFormat(temp_file_path, mode="r") - format.validate() - - def test_amrfinderplus_annotation_format_validation_error(self): - with self.assertRaises(ValidationError) as context: - path = self.get_data_path("annotation_wrong/amr_annotation.tsv") - format = AMRFinderPlusAnnotationFormat(path, mode="r") - format.validate() - - header_coordinates = [ - "Protein identifier", - "Contig id", - "Start", - "Stop", - "Strand", - "Gene symbol", - "Sequence name", - "Scope", - "Element type", - "Element subtype", - "Class", - "Subclass", - "Method", - "Target length", - "Reference sequence length", - "% Coverage of reference sequence", - "% Identity to reference sequence", - "Alignment length", - "Accession of closest sequence", - "Name of closest sequence", - "HMM id", - "HMM description", - ] - expected_message = ( - "Header line does not match AMRFinderPlusAnnotation format. Must " - "consist of the following values: " - + ", ".join(header_coordinates) - + ".\nWhile Contig id, Start, Stop and Strand are optional." - + "\n\nFound instead: " - + "Incorrect Header 1, Incorrect Header 2, Incorrect Header 3" - ) - - self.assertEqual(str(context.exception), expected_message) - - def test_amrfinderplus_annotation_directory_format(self): - dirpath = self.get_data_path( - "annotation/coordinates/e026af61-d911-4de3-a957-7e8bf837f30d" - ) - annotations = AMRFinderPlusAnnotationDirFmt(dirpath, mode="r") - assert isinstance(annotations, AMRFinderPlusAnnotationDirFmt) - - def test_amrfinderplus_annotations_directory_format(self): - dirpath = self.get_data_path("annotation") - annotations = AMRFinderPlusAnnotationsDirFmt(dirpath, mode="r") - assert isinstance(annotations, AMRFinderPlusAnnotationsDirFmt) diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index bd21a68..828622b 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -7,7 +7,6 @@ # ---------------------------------------------------------------------------- import importlib -from q2_types.feature_data import FeatureData from q2_types.feature_table import FeatureTable, Frequency from q2_types.per_sample_sequences import ( MAGs, @@ -30,18 +29,11 @@ from q2_amr import __version__ from q2_amr.amrfinderplus.types._format import ( - AMRFinderPlusAnnotationDirFmt, - AMRFinderPlusAnnotationFormat, - AMRFinderPlusAnnotationsDirFmt, AMRFinderPlusDatabaseDirFmt, BinaryFormat, TextFormat, ) -from q2_amr.amrfinderplus.types._type import ( - AMRFinderPlusAnnotation, - AMRFinderPlusAnnotations, - AMRFinderPlusDatabase, -) +from q2_amr.amrfinderplus.types._type import AMRFinderPlusDatabase from q2_amr.card.database import fetch_card_db from q2_amr.card.heatmap import heatmap from q2_amr.card.kmer import ( @@ -1092,8 +1084,6 @@ CARDReadsAlleleKmerAnalysis, CARDMAGsKmerAnalysis, AMRFinderPlusDatabase, - AMRFinderPlusAnnotations, - AMRFinderPlusAnnotation, ) plugin.register_semantic_type_to_format( @@ -1128,15 +1118,6 @@ AMRFinderPlusDatabase, artifact_format=AMRFinderPlusDatabaseDirFmt, ) - -plugin.register_semantic_type_to_format( - SampleData[AMRFinderPlusAnnotations], - artifact_format=AMRFinderPlusAnnotationsDirFmt, -) -plugin.register_semantic_type_to_format( - FeatureData[AMRFinderPlusAnnotation], - artifact_format=AMRFinderPlusAnnotationDirFmt, -) plugin.register_formats( CARDKmerDatabaseDirectoryFormat, CARDKmerJSONFormat, @@ -1164,9 +1145,6 @@ AMRFinderPlusDatabaseDirFmt, TextFormat, BinaryFormat, - AMRFinderPlusAnnotationFormat, - AMRFinderPlusAnnotationsDirFmt, - AMRFinderPlusAnnotationDirFmt, ) importlib.import_module("q2_amr.card.types._transformer")