6
6
# The full license is in the file LICENSE, distributed with this software.
7
7
# ----------------------------------------------------------------------------
8
8
import re
9
- from collections import defaultdict
10
9
11
10
import qiime2 .plugin .model as model
12
11
from qiime2 .plugin import ValidationError
13
12
13
+ from q2_types ._util import FileDictMixin
14
14
from q2_types .feature_data import DNAFASTAFormat , ProteinFASTAFormat
15
15
16
16
@@ -19,63 +19,18 @@ def _validate_(self, level):
19
19
pass
20
20
21
21
22
- class GenomeDataDirectoryFormat (model .DirectoryFormat ):
23
- def genome_dict (self , relative = False ):
24
- """
25
- For per sample directories it returns a mapping of sample id to
26
- another dictionary where keys represent the file name and values
27
- correspond to the filepath for each file.
28
- For files, it returns a mapping of file name to filepath for each file.
29
-
30
- Parameters
31
- ---------
32
- relative : bool
33
- Whether to return filepaths relative to the directory's location.
34
- Returns absolute filepaths by default.
35
-
36
- Returns
37
- -------
38
- dict
39
- Mapping of filename -> filepath as described above.
40
- Or mapping of sample id -> dict {filename: filepath} as
41
- described above.
42
- Both levels of the dictionary are sorted alphabetically by key.
43
- """
44
- ids = defaultdict (dict )
45
- for entry in self .path .iterdir ():
46
- if entry .is_dir ():
47
- sample_id = entry .name
48
- for path in entry .iterdir ():
49
- file_name = path .stem
50
- file_path = (
51
- path .absolute ().relative_to (self .path .absolute ())
52
- if relative else path .absolute ()
53
- )
54
- ids [sample_id ][file_name ] = str (file_path )
55
- ids [sample_id ] = dict (sorted (ids [sample_id ].items ()))
56
- else :
57
- file_name = entry .stem
58
- file_path = (
59
- entry .absolute ().relative_to (self .path .absolute ())
60
- if relative else entry .absolute ()
61
- )
62
- ids [file_name ] = str (file_path )
63
-
64
- return dict (sorted (ids .items ()))
65
-
66
-
67
- class GenesDirectoryFormat (GenomeDataDirectoryFormat ):
68
- genes = model .FileCollection (r'.+\.(fa|fna|fasta)$' ,
69
- format = DNAFASTAFormat )
22
+ class GenesDirectoryFormat (model .DirectoryFormat , FileDictMixin ):
23
+ pathspec = r'.+\.(fa|fna|fasta)$'
24
+ genes = model .FileCollection (pathspec , format = DNAFASTAFormat )
70
25
71
26
@genes .set_path_maker
72
27
def genes_path_maker (self , genome_id ):
73
28
return '%s.fasta' % genome_id
74
29
75
30
76
- class ProteinsDirectoryFormat (GenomeDataDirectoryFormat ):
77
- proteins = model . FileCollection ( r'.+\.(fa|faa|fasta)$' ,
78
- format = ProteinFASTAFormat )
31
+ class ProteinsDirectoryFormat (model . DirectoryFormat , FileDictMixin ):
32
+ pathspec = r'.+\.(fa|faa|fasta)$'
33
+ proteins = model . FileCollection ( pathspec , format = ProteinFASTAFormat )
79
34
80
35
@proteins .set_path_maker
81
36
def proteins_path_maker (self , genome_id ):
@@ -205,17 +160,18 @@ def _validate_(self, level):
205
160
f'{ line_number } ' ) from e
206
161
207
162
208
- class LociDirectoryFormat (GenomeDataDirectoryFormat ):
209
- loci = model . FileCollection ( r'.+\.gff$' ,
210
- format = GFF3Format )
163
+ class LociDirectoryFormat (model . DirectoryFormat , FileDictMixin ):
164
+ pathspec = r'.+\.gff$'
165
+ loci = model . FileCollection ( pathspec , format = GFF3Format )
211
166
212
167
@loci .set_path_maker
213
168
def loci_path_maker (self , genome_id ):
214
169
return '%s.gff' % genome_id
215
170
216
171
217
- class GenomeSequencesDirectoryFormat (GenomeDataDirectoryFormat ):
218
- genomes = model .FileCollection (r'.+\.(fasta|fa)$' , format = DNAFASTAFormat )
172
+ class GenomeSequencesDirectoryFormat (model .DirectoryFormat , FileDictMixin ):
173
+ pathspec = r'.+\.(fasta|fa)$'
174
+ genomes = model .FileCollection (pathspec , format = DNAFASTAFormat )
219
175
220
176
@genomes .set_path_maker
221
177
def genomes_path_maker (self , genome_id ):
0 commit comments