-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathtest_cutter.py
153 lines (117 loc) · 5.67 KB
/
test_cutter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# ----------------------------------------------------------------------------
# Copyright (c) 2016-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import skbio
from qiime2.sdk import Artifact
from qiime2.plugins.feature_classifier.actions import extract_reads
from q2_types.feature_data import DNAFASTAFormat
from . import FeatureClassifierTestPluginBase
class CutterTests(FeatureClassifierTestPluginBase):
package = 'q2_feature_classifier.tests'
def setUp(self):
super().setUp()
self.sequences = Artifact.import_data(
'FeatureData[Sequence]',
self.get_data_path('dna-sequences.fasta'))
self.mixed_sequences = Artifact.import_data(
'FeatureData[Sequence]',
self.get_data_path('dna-sequences-mixed.fasta'))
self.f_primer = 'AGAGA'
self.r_primer = 'GCTGC'
self.amplicons = ['ACGT', 'AAGT', 'ACCT', 'ACGG', 'ACTT']
def _test_results(self, results):
for i, result in enumerate(
skbio.io.read(str(results.reads.view(DNAFASTAFormat)),
format='fasta')):
self.assertEqual(str(result), self.amplicons[i])
def test_extract_reads_expected(self):
results = extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=4)
self._test_results(results)
def test_extract_reads_expected_forward(self):
results = extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=4, read_orientation='forward')
self._test_results(results)
def test_extract_mixed(self):
results = extract_reads(
self.mixed_sequences, f_primer=self.f_primer,
r_primer=self.r_primer, min_length=4)
self._test_results(results)
def test_extract_reads_expected_reverse(self):
reverse_sequences = Artifact.import_data(
'FeatureData[Sequence]',
self.get_data_path('dna-sequences-reverse.fasta'))
results = extract_reads(
reverse_sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=4, read_orientation='reverse')
self._test_results(results)
def test_extract_reads_manual_batch_size(self):
results = extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=4, batch_size=10)
self._test_results(results)
def test_extract_reads_two_jobs(self):
results = extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=4, n_jobs=2)
self._test_results(results)
def test_extract_reads_expected_degenerate_primers(self):
degenerate_f_primer = 'WWWWW'
degenerate_r_primer = 'SSSSS'
degenerate_sequences = Artifact.import_data(
'FeatureData[Sequence]',
self.get_data_path('dna-sequences-degenerate-primers.fasta'))
results = extract_reads(
degenerate_sequences, f_primer=degenerate_f_primer,
r_primer=degenerate_r_primer, min_length=4)
self._test_results(results)
def test_extract_reads_expected_trim_right(self):
"""Tests expected behavior of trim_right option"""
results = extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=3, trim_right=1)
for i, result in enumerate(
skbio.io.read(str(results.reads.view(DNAFASTAFormat)),
format='fasta')):
self.assertEqual(str(result), self.amplicons[i][:-1])
def test_extract_reads_fail_identity(self):
with self.assertRaisesRegex(RuntimeError, "No matches found"):
extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=4, identity=1)
def test_extract_reads_fail_min_length(self):
with self.assertRaisesRegex(RuntimeError, "No matches found"):
extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
min_length=5)
def test_extract_reads_fail_max_length(self):
with self.assertRaisesRegex(RuntimeError, "No matches found"):
extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
max_length=1)
def test_extract_reads_fail_trim_left_entire_read(self):
with self.assertRaisesRegex(RuntimeError, "No matches found"):
extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
trim_left=4)
def test_extract_reads_fail_trim_right_entire_read(self):
with self.assertRaisesRegex(RuntimeError, "No matches found"):
extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
trim_right=4)
def test_extract_reads_fail_trim_both_entire_read(self):
with self.assertRaisesRegex(RuntimeError, "No matches found"):
extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
trim_left=2, trim_right=2)
def test_extract_reads_fail_min_len_greater_than_trunc_len(self):
with self.assertRaisesRegex(ValueError, "minimum length setting"):
extract_reads(
self.sequences, f_primer=self.f_primer, r_primer=self.r_primer,
trunc_len=1)