From 18460e446e80c739d524a7a913d775224cb1a54c Mon Sep 17 00:00:00 2001 From: Liz Gehret <54517601+lizgehret@users.noreply.github.com> Date: Tue, 28 May 2024 11:15:15 -0700 Subject: [PATCH] TEST: updating transformer tests that call format validation (#335) --- q2_types/feature_data/_transformer.py | 4 ++ .../feature_data/tests/test_transformer.py | 51 +++++++++++-------- q2_types/metadata/tests/test_transformer.py | 7 +-- .../tests/test_transformer.py | 6 +-- 4 files changed, 42 insertions(+), 26 deletions(-) diff --git a/q2_types/feature_data/_transformer.py b/q2_types/feature_data/_transformer.py index 8680292c..9940b23b 100644 --- a/q2_types/feature_data/_transformer.py +++ b/q2_types/feature_data/_transformer.py @@ -285,6 +285,10 @@ def _fastaformats_to_series(ff, constructor=skbio.DNA, lowercase=False): for sequence in _read_from_fasta(str(ff), constructor, lowercase=lowercase): id_ = sequence.metadata['id'] + # this may no longer do anything b/c of format validation, but leaving + # here as a safeguard & we may want to examine/address later + # relevant PR associated with this change: + # https://github.com/qiime2/q2-types/pull/335 if id_ in data: raise ValueError("FASTA format sequence IDs must be unique. The " "following ID was found more than once: %s." diff --git a/q2_types/feature_data/tests/test_transformer.py b/q2_types/feature_data/tests/test_transformer.py index a46329cd..e64b6771 100644 --- a/q2_types/feature_data/tests/test_transformer.py +++ b/q2_types/feature_data/tests/test_transformer.py @@ -679,8 +679,10 @@ def test_series_to_dnafasta_format(self): def test_dnafasta_format_with_duplicate_ids_to_series(self): with self.assertRaisesRegex(ValueError, 'unique.*SEQUENCE1'): - self.transform_format(DNAFASTAFormat, pd.Series, - 'dna-sequences-with-duplicate-ids.fasta') + transformer = self.get_transformer(DNAFASTAFormat, pd.Series) + input = self.get_data_path( + 'dna-sequences-with-duplicate-ids.fasta') + transformer(input) def test_dnafasta_format_to_metadata(self): _, obs = self.transform_format(DNAFASTAFormat, qiime2.Metadata, @@ -901,8 +903,10 @@ def test_series_to_rnafasta_format(self): def test_rnafasta_format_with_duplicate_ids_to_series(self): with self.assertRaisesRegex(ValueError, 'unique.*RNASEQUENCE1'): - self.transform_format(RNAFASTAFormat, pd.Series, - 'rna-sequences-with-duplicate-ids.fasta') + transformer = self.get_transformer(RNAFASTAFormat, pd.Series) + input = self.get_data_path( + 'rna-sequences-with-duplicate-ids.fasta') + transformer(input) def test_rnafasta_format_to_metadata(self): _, obs = self.transform_format(RNAFASTAFormat, qiime2.Metadata, @@ -1011,9 +1015,11 @@ def test_mixed_case_dna_fasta_format_to_series(self): def test_mixed_case_dna_fasta_format_with_duplicate_ids_to_series(self): with self.assertRaisesRegex(ValueError, 'unique.*SEQUENCE1'): - self.transform_format( - MixedCaseDNAFASTAFormat, pd.Series, - 'dna-sequences-mixed-case-with-duplicate-ids.fasta') + transformer = self.get_transformer( + MixedCaseDNAFASTAFormat, pd.Series) + input = self.get_data_path( + 'dna-sequences-mixed-case-with-duplicate-ids.fasta') + transformer(input) def test_mixed_case_dna_fasta_format_to_metadata(self): _, obs = self.transform_format(MixedCaseDNAFASTAFormat, @@ -1077,9 +1083,11 @@ def test_mixed_case_rna_fasta_format_to_series(self): def test_mixed_case_rna_fasta_format_with_duplicate_ids_to_series(self): with self.assertRaisesRegex(ValueError, 'unique.*SEQUENCE1'): - self.transform_format( - MixedCaseRNAFASTAFormat, pd.Series, - 'rna-sequences-mixed-case-with-duplicate-ids.fasta') + transformer = self.get_transformer( + MixedCaseRNAFASTAFormat, pd.Series) + input = self.get_data_path( + 'rna-sequences-mixed-case-with-duplicate-ids.fasta') + transformer(input) def test_mixed_case_rna_fasta_format_to_metadata(self): _, obs = self.transform_format(MixedCaseRNAFASTAFormat, @@ -1145,9 +1153,11 @@ def test_mixed_case_aln_dna_fasta_format_to_series(self): def test_mixed_case_aln_dna_fasta_format_w_duplicate_ids_to_series(self): with self.assertRaisesRegex(ValueError, 'unique.*SEQUENCE1'): - self.transform_format( - MixedCaseAlignedDNAFASTAFormat, pd.Series, - 'dna-sequences-mixed-case-with-duplicate-ids.fasta') + transformer = self.get_transformer( + MixedCaseAlignedDNAFASTAFormat, pd.Series) + input = self.get_data_path( + 'dna-sequences-mixed-case-with-duplicate-ids.fasta') + transformer(input) def test_mixed_case_aln_dna_fasta_format_to_metadata(self): _, obs = self.transform_format( @@ -1215,9 +1225,11 @@ def test_mixed_case_aln_rna_fasta_format_to_series(self): def test_mixed_case_aln_rna_fasta_format_w_duplicate_ids_to_series(self): with self.assertRaisesRegex(ValueError, 'unique.*SEQUENCE1'): - self.transform_format( - MixedCaseAlignedRNAFASTAFormat, pd.Series, - 'rna-sequences-mixed-case-with-duplicate-ids.fasta') + transformer = self.get_transformer( + MixedCaseAlignedRNAFASTAFormat, pd.Series) + input = self.get_data_path( + 'rna-sequences-mixed-case-with-duplicate-ids.fasta') + transformer(input) def test_mixed_case_aln_rna_fasta_format_to_metadata(self): _, obs = self.transform_format( @@ -1402,10 +1414,9 @@ def test_series_to_proteinfasta_format(self): def test_proteinfasta_format_with_duplicate_ids_to_series(self): with self.assertRaisesRegex(ValueError, 'unique.*sequence1'): - self.transform_format( - ProteinFASTAFormat, - pd.Series, - 'protein-sequences-duplicate-ids.fasta') + transformer = self.get_transformer(ProteinFASTAFormat, pd.Series) + input = self.get_data_path('protein-sequences-duplicate-ids.fasta') + transformer(input) def test_proteinfasta_format_to_metadata(self): _, obs = self.transform_format(ProteinFASTAFormat, qiime2.Metadata, diff --git a/q2_types/metadata/tests/test_transformer.py b/q2_types/metadata/tests/test_transformer.py index 64ca9b2b..f4083f43 100644 --- a/q2_types/metadata/tests/test_transformer.py +++ b/q2_types/metadata/tests/test_transformer.py @@ -24,11 +24,12 @@ def test_metadata_format_to_metadata(self): self.assertEqual(obs, exp_md) def test_non_metadata(self): - filename = 'invalid-metadata-1.tsv' with self.assertRaisesRegex(MetadataFileError, "column name 'bad-id-label'"): - self.transform_format(ImmutableMetadataFormat, qiime2.Metadata, - filename) + transformer = self.get_transformer( + ImmutableMetadataFormat, qiime2.Metadata) + input = self.get_data_path('invalid-metadata-1.tsv') + transformer(input) def test_metadata_to_metadata_format(self): filename = 'metadata.tsv' diff --git a/q2_types/per_sample_sequences/tests/test_transformer.py b/q2_types/per_sample_sequences/tests/test_transformer.py index bd57da41..ca7a3b14 100644 --- a/q2_types/per_sample_sequences/tests/test_transformer.py +++ b/q2_types/per_sample_sequences/tests/test_transformer.py @@ -144,7 +144,7 @@ def test_casava_one_eight_laneless_per_sample_dirfmt_to_slpspefd(self): input, dirfmt = self.transform_format( CasavaOneEightLanelessPerSampleDirFmt, - SingleLanePerSamplePairedEndFastqDirFmt, filenames=filenames + SingleLanePerSampleSingleEndFastqDirFmt, filenames=filenames ) expected_filepaths = ['Human-Kneecap_S1_L001_R1_001.fastq.gz', 'Human-Armpit_S2_L001_R1_001.fastq.gz'] @@ -179,7 +179,7 @@ def test_casava_one_eight_single_lane_per_sample_dirfmt_to_slpspefdf(self): filenames = ('Human-Kneecap_S1_L001_R1_001.fastq.gz',) input, obs = self.transform_format( CasavaOneEightSingleLanePerSampleDirFmt, - SingleLanePerSamplePairedEndFastqDirFmt, filenames=filenames + SingleLanePerSampleSingleEndFastqDirFmt, filenames=filenames ) input = skbio.io.read( @@ -216,7 +216,7 @@ def test_miseq_demux_dirfmt_to_slpssefdf(self): def test_miseq_demux_dirfmt_to_slpspefdf(self): input, obs = self.transform_format( CasavaOneEightLanelessPerSampleDirFmt, - SingleLanePerSamplePairedEndFastqDirFmt, + SingleLanePerSampleSingleEndFastqDirFmt, filenames=('Human-Kneecap_S1_R1_001.fastq.gz',), )