Skip to content

Commit b4b740a

Browse files
committed
added file_dict mixin
1 parent 4b9509a commit b4b740a

File tree

2 files changed

+105
-1
lines changed

2 files changed

+105
-1
lines changed

q2_types/kraken2/_formats.py

+99-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#
66
# The full license is in the file LICENSE, distributed with this software.
77
# ----------------------------------------------------------------------------
8+
from collections import defaultdict
89

910
import pandas as pd
1011
from pandas.core.dtypes.common import is_string_dtype
@@ -67,7 +68,104 @@ def _validate_(self, level):
6768
)
6869

6970

70-
class Kraken2ReportDirectoryFormat(model.DirectoryFormat):
71+
class FileDictMixin:
72+
def file_dict(self, relative=False, suffixes=None):
73+
"""
74+
For per sample directories it returns a mapping of sample id to
75+
another dictionary where keys represent the file name and values
76+
correspond to the filepath for each file.
77+
For files, it returns a mapping of file name to filepath for each
78+
file. The specified suffixes are removed from filenames.
79+
80+
Parameters
81+
---------
82+
relative : bool
83+
Whether to return filepaths relative to the directory's location.
84+
Returns absolute filepaths by default.
85+
suffixes : List
86+
A list of suffixes that should be removed from the filenames to
87+
generate the ID.
88+
89+
Returns
90+
-------
91+
dict
92+
Mapping of filename -> filepath as described above.
93+
Or mapping of sample id -> dict {filename: filepath} as
94+
described above.
95+
Both levels of the dictionary are sorted alphabetically by key.
96+
"""
97+
ids = defaultdict(dict)
98+
for entry in self.path.iterdir():
99+
if entry.is_dir():
100+
outer_id = entry.name
101+
for path in entry.iterdir():
102+
file_path, inner_id = _create_path(
103+
path=path,
104+
relative=relative,
105+
dir_format=self,
106+
suffixes=suffixes
107+
)
108+
109+
ids[outer_id][inner_id] = str(file_path)
110+
ids[outer_id] = dict(sorted(ids[outer_id].items()))
111+
else:
112+
file_path, inner_id = _create_path(
113+
path=entry,
114+
relative=relative,
115+
dir_format=self,
116+
suffixes=suffixes
117+
118+
)
119+
120+
ids[inner_id] = str(file_path)
121+
122+
return dict(sorted(ids.items()))
123+
124+
125+
def _create_path(path, relative, dir_format, suffixes):
126+
"""
127+
This function processes the input file path to generate an absolute or
128+
relative path string and the ID derived from the file name. The ID is
129+
extracted by removing the one of the specified suffixes from the file
130+
name. If no suffixes are specified the ID is defined to be the filename.
131+
132+
Parameters:
133+
---------
134+
path : Path
135+
A Path object representing the file path to process.
136+
relative : bool
137+
A flag indicating whether the returned path should be relative
138+
to the directory formats path or absolute.
139+
dir_format : DirectoryFormat.
140+
Any object of class DirectoryFormat.
141+
142+
Returns:
143+
-------
144+
path_dict : str
145+
The full relative or absolut path to the file.
146+
_id : str
147+
The ID derived from the file name. ID will be "" if the filename
148+
consists only of the suffix.
149+
"""
150+
file_name = path.stem
151+
152+
_id = file_name
153+
154+
if suffixes:
155+
for suffix in suffixes:
156+
if file_name.endswith(suffix[1:]):
157+
_id = file_name[:-len(suffix)]
158+
break
159+
160+
path_dict = (
161+
path.absolute().relative_to(dir_format.path.absolute())
162+
if relative
163+
else path.absolute()
164+
)
165+
return str(path_dict), _id
166+
167+
168+
class Kraken2ReportDirectoryFormat(model.DirectoryFormat, FileDictMixin):
71169
reports = model.FileCollection(
72170
r'.+report\.(txt|tsv)$', format=Kraken2ReportFormat
73171
)

q2_types/kraken2/tests/test_formats.py

+6
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,12 @@ def test_report_dirfmt_from_reads(self):
8787
fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r')
8888
fmt.validate()
8989

90+
def test_report_dirfmt_from_reads_dict(self):
91+
dirpath = self.get_data_path('reports-reads')
92+
fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r')
93+
dict = fmt.file_dict(suffixes=["_report"])
94+
print(fmt)
95+
9096
def test_report_dirfmt_from_mags(self):
9197
dirpath = self.get_data_path('reports-mags')
9298
fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r')

0 commit comments

Comments
 (0)