From b4b740a0e5e6ecd3022498896dab44e45cf306c2 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 15 Oct 2024 10:00:16 +0200 Subject: [PATCH 01/22] added file_dict mixin --- q2_types/kraken2/_formats.py | 100 ++++++++++++++++++++++++- q2_types/kraken2/tests/test_formats.py | 6 ++ 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/q2_types/kraken2/_formats.py b/q2_types/kraken2/_formats.py index 380bbd3..d634314 100644 --- a/q2_types/kraken2/_formats.py +++ b/q2_types/kraken2/_formats.py @@ -5,6 +5,7 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +from collections import defaultdict import pandas as pd from pandas.core.dtypes.common import is_string_dtype @@ -67,7 +68,104 @@ def _validate_(self, level): ) -class Kraken2ReportDirectoryFormat(model.DirectoryFormat): +class FileDictMixin: + def file_dict(self, relative=False, suffixes=None): + """ + For per sample directories it returns a mapping of sample id to + another dictionary where keys represent the file name and values + correspond to the filepath for each file. + For files, it returns a mapping of file name to filepath for each + file. The specified suffixes are removed from filenames. + + Parameters + --------- + relative : bool + Whether to return filepaths relative to the directory's location. + Returns absolute filepaths by default. + suffixes : List + A list of suffixes that should be removed from the filenames to + generate the ID. + + Returns + ------- + dict + Mapping of filename -> filepath as described above. + Or mapping of sample id -> dict {filename: filepath} as + described above. + Both levels of the dictionary are sorted alphabetically by key. + """ + ids = defaultdict(dict) + for entry in self.path.iterdir(): + if entry.is_dir(): + outer_id = entry.name + for path in entry.iterdir(): + file_path, inner_id = _create_path( + path=path, + relative=relative, + dir_format=self, + suffixes=suffixes + ) + + ids[outer_id][inner_id] = str(file_path) + ids[outer_id] = dict(sorted(ids[outer_id].items())) + else: + file_path, inner_id = _create_path( + path=entry, + relative=relative, + dir_format=self, + suffixes=suffixes + + ) + + ids[inner_id] = str(file_path) + + return dict(sorted(ids.items())) + + +def _create_path(path, relative, dir_format, suffixes): + """ + This function processes the input file path to generate an absolute or + relative path string and the ID derived from the file name. The ID is + extracted by removing the one of the specified suffixes from the file + name. If no suffixes are specified the ID is defined to be the filename. + + Parameters: + --------- + path : Path + A Path object representing the file path to process. + relative : bool + A flag indicating whether the returned path should be relative + to the directory formats path or absolute. + dir_format : DirectoryFormat. + Any object of class DirectoryFormat. + + Returns: + ------- + path_dict : str + The full relative or absolut path to the file. + _id : str + The ID derived from the file name. ID will be "" if the filename + consists only of the suffix. + """ + file_name = path.stem + + _id = file_name + + if suffixes: + for suffix in suffixes: + if file_name.endswith(suffix[1:]): + _id = file_name[:-len(suffix)] + break + + path_dict = ( + path.absolute().relative_to(dir_format.path.absolute()) + if relative + else path.absolute() + ) + return str(path_dict), _id + + +class Kraken2ReportDirectoryFormat(model.DirectoryFormat, FileDictMixin): reports = model.FileCollection( r'.+report\.(txt|tsv)$', format=Kraken2ReportFormat ) diff --git a/q2_types/kraken2/tests/test_formats.py b/q2_types/kraken2/tests/test_formats.py index 038eede..4b0157b 100644 --- a/q2_types/kraken2/tests/test_formats.py +++ b/q2_types/kraken2/tests/test_formats.py @@ -87,6 +87,12 @@ def test_report_dirfmt_from_reads(self): fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r') fmt.validate() + def test_report_dirfmt_from_reads_dict(self): + dirpath = self.get_data_path('reports-reads') + fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r') + dict = fmt.file_dict(suffixes=["_report"]) + print(fmt) + def test_report_dirfmt_from_mags(self): dirpath = self.get_data_path('reports-mags') fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r') From 03efc075af2fcc0629b9f35f01070dbacab311f5 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 15 Oct 2024 10:36:12 +0200 Subject: [PATCH 02/22] moved mixin to util --- q2_types/_util.py | 98 ++++++++++++++++++++++++ q2_types/kraken2/_formats.py | 101 +------------------------ q2_types/kraken2/tests/test_formats.py | 6 -- 3 files changed, 100 insertions(+), 105 deletions(-) diff --git a/q2_types/_util.py b/q2_types/_util.py index 9f633b7..0349450 100644 --- a/q2_types/_util.py +++ b/q2_types/_util.py @@ -8,6 +8,7 @@ import gzip import itertools import warnings +from collections import defaultdict from typing import List import skbio @@ -138,3 +139,100 @@ def _validate_mag_ids( "correctly. Printing duplicate MAG IDs: " f"{set(duplicates)}" ) + + +class FileDictMixin: + def file_dict(self, relative=False, suffixes=None): + """ + For per sample directories it returns a mapping of sample id to + another dictionary where keys represent the file name and values + correspond to the filepath for each file. + For files, it returns a mapping of file name to filepath for each + file. The specified suffixes are removed from filenames. + + Parameters + --------- + relative : bool + Whether to return filepaths relative to the directory's location. + Returns absolute filepaths by default. + suffixes : List + A list of suffixes that should be removed from the filenames to + generate the ID. + + Returns + ------- + dict + Mapping of filename -> filepath as described above. + Or mapping of sample id -> dict {filename: filepath} as + described above. + Both levels of the dictionary are sorted alphabetically by key. + """ + ids = defaultdict(dict) + for entry in self.path.iterdir(): + if entry.is_dir(): + outer_id = entry.name + for path in entry.iterdir(): + file_path, inner_id = _create_path( + path=path, + relative=relative, + dir_format=self, + suffixes=suffixes + ) + + ids[outer_id][inner_id] = str(file_path) + ids[outer_id] = dict(sorted(ids[outer_id].items())) + else: + file_path, inner_id = _create_path( + path=entry, + relative=relative, + dir_format=self, + suffixes=suffixes + + ) + + ids[inner_id] = str(file_path) + + return dict(sorted(ids.items())) + + +def _create_path(path, relative, dir_format, suffixes): + """ + This function processes the input file path to generate an absolute or + relative path string and the ID derived from the file name. The ID is + extracted by removing the one of the specified suffixes from the file + name. If no suffixes are specified the ID is defined to be the filename. + + Parameters: + --------- + path : Path + A Path object representing the file path to process. + relative : bool + A flag indicating whether the returned path should be relative + to the directory formats path or absolute. + dir_format : DirectoryFormat. + Any object of class DirectoryFormat. + + Returns: + ------- + path_dict : str + The full relative or absolut path to the file. + _id : str + The ID derived from the file name. ID will be "" if the filename + consists only of the suffix. + """ + file_name = path.stem + + _id = file_name + + if suffixes: + for suffix in suffixes: + if file_name.endswith(suffix[1:]): + _id = file_name[:-len(suffix)] + break + + path_dict = ( + path.absolute().relative_to(dir_format.path.absolute()) + if relative + else path.absolute() + ) + return str(path_dict), _id \ No newline at end of file diff --git a/q2_types/kraken2/_formats.py b/q2_types/kraken2/_formats.py index d634314..6d6ad74 100644 --- a/q2_types/kraken2/_formats.py +++ b/q2_types/kraken2/_formats.py @@ -5,12 +5,12 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -from collections import defaultdict - import pandas as pd from pandas.core.dtypes.common import is_string_dtype from qiime2.plugin import model, ValidationError +from q2_types._util import FileDictMixin + class Kraken2ReportFormat(model.TextFileFormat): MEASURE_COLUMNS = { @@ -68,103 +68,6 @@ def _validate_(self, level): ) -class FileDictMixin: - def file_dict(self, relative=False, suffixes=None): - """ - For per sample directories it returns a mapping of sample id to - another dictionary where keys represent the file name and values - correspond to the filepath for each file. - For files, it returns a mapping of file name to filepath for each - file. The specified suffixes are removed from filenames. - - Parameters - --------- - relative : bool - Whether to return filepaths relative to the directory's location. - Returns absolute filepaths by default. - suffixes : List - A list of suffixes that should be removed from the filenames to - generate the ID. - - Returns - ------- - dict - Mapping of filename -> filepath as described above. - Or mapping of sample id -> dict {filename: filepath} as - described above. - Both levels of the dictionary are sorted alphabetically by key. - """ - ids = defaultdict(dict) - for entry in self.path.iterdir(): - if entry.is_dir(): - outer_id = entry.name - for path in entry.iterdir(): - file_path, inner_id = _create_path( - path=path, - relative=relative, - dir_format=self, - suffixes=suffixes - ) - - ids[outer_id][inner_id] = str(file_path) - ids[outer_id] = dict(sorted(ids[outer_id].items())) - else: - file_path, inner_id = _create_path( - path=entry, - relative=relative, - dir_format=self, - suffixes=suffixes - - ) - - ids[inner_id] = str(file_path) - - return dict(sorted(ids.items())) - - -def _create_path(path, relative, dir_format, suffixes): - """ - This function processes the input file path to generate an absolute or - relative path string and the ID derived from the file name. The ID is - extracted by removing the one of the specified suffixes from the file - name. If no suffixes are specified the ID is defined to be the filename. - - Parameters: - --------- - path : Path - A Path object representing the file path to process. - relative : bool - A flag indicating whether the returned path should be relative - to the directory formats path or absolute. - dir_format : DirectoryFormat. - Any object of class DirectoryFormat. - - Returns: - ------- - path_dict : str - The full relative or absolut path to the file. - _id : str - The ID derived from the file name. ID will be "" if the filename - consists only of the suffix. - """ - file_name = path.stem - - _id = file_name - - if suffixes: - for suffix in suffixes: - if file_name.endswith(suffix[1:]): - _id = file_name[:-len(suffix)] - break - - path_dict = ( - path.absolute().relative_to(dir_format.path.absolute()) - if relative - else path.absolute() - ) - return str(path_dict), _id - - class Kraken2ReportDirectoryFormat(model.DirectoryFormat, FileDictMixin): reports = model.FileCollection( r'.+report\.(txt|tsv)$', format=Kraken2ReportFormat diff --git a/q2_types/kraken2/tests/test_formats.py b/q2_types/kraken2/tests/test_formats.py index 4b0157b..038eede 100644 --- a/q2_types/kraken2/tests/test_formats.py +++ b/q2_types/kraken2/tests/test_formats.py @@ -87,12 +87,6 @@ def test_report_dirfmt_from_reads(self): fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r') fmt.validate() - def test_report_dirfmt_from_reads_dict(self): - dirpath = self.get_data_path('reports-reads') - fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r') - dict = fmt.file_dict(suffixes=["_report"]) - print(fmt) - def test_report_dirfmt_from_mags(self): dirpath = self.get_data_path('reports-mags') fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r') From b8b270aafb0079f23dd9405d594e5c5ab3e344a7 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 15 Oct 2024 17:36:19 +0200 Subject: [PATCH 03/22] added tests for file_ dict --- q2_types/_util.py | 6 +- .../tests/data/not_per_sample/id1_suffix1.txt | 0 .../tests/data/not_per_sample/id2_suffix2.txt | 0 .../data/per_sample/sample1/id1_suffix.txt | 0 .../tests/data/per_sample/sample2/suffix.txt | 0 q2_types/tests/test_util.py | 130 +++++++++++++++++- 6 files changed, 132 insertions(+), 4 deletions(-) create mode 100644 q2_types/tests/data/not_per_sample/id1_suffix1.txt create mode 100644 q2_types/tests/data/not_per_sample/id2_suffix2.txt create mode 100644 q2_types/tests/data/per_sample/sample1/id1_suffix.txt create mode 100644 q2_types/tests/data/per_sample/sample2/suffix.txt diff --git a/q2_types/_util.py b/q2_types/_util.py index 0349450..9b8ddb7 100644 --- a/q2_types/_util.py +++ b/q2_types/_util.py @@ -172,7 +172,7 @@ def file_dict(self, relative=False, suffixes=None): if entry.is_dir(): outer_id = entry.name for path in entry.iterdir(): - file_path, inner_id = _create_path( + file_path, inner_id = _process_path( path=path, relative=relative, dir_format=self, @@ -182,7 +182,7 @@ def file_dict(self, relative=False, suffixes=None): ids[outer_id][inner_id] = str(file_path) ids[outer_id] = dict(sorted(ids[outer_id].items())) else: - file_path, inner_id = _create_path( + file_path, inner_id = _process_path( path=entry, relative=relative, dir_format=self, @@ -195,7 +195,7 @@ def file_dict(self, relative=False, suffixes=None): return dict(sorted(ids.items())) -def _create_path(path, relative, dir_format, suffixes): +def _process_path(path, relative, dir_format, suffixes): """ This function processes the input file path to generate an absolute or relative path string and the ID derived from the file name. The ID is diff --git a/q2_types/tests/data/not_per_sample/id1_suffix1.txt b/q2_types/tests/data/not_per_sample/id1_suffix1.txt new file mode 100644 index 0000000..e69de29 diff --git a/q2_types/tests/data/not_per_sample/id2_suffix2.txt b/q2_types/tests/data/not_per_sample/id2_suffix2.txt new file mode 100644 index 0000000..e69de29 diff --git a/q2_types/tests/data/per_sample/sample1/id1_suffix.txt b/q2_types/tests/data/per_sample/sample1/id1_suffix.txt new file mode 100644 index 0000000..e69de29 diff --git a/q2_types/tests/data/per_sample/sample2/suffix.txt b/q2_types/tests/data/per_sample/sample2/suffix.txt new file mode 100644 index 0000000..e69de29 diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py index edaa929..57b750b 100644 --- a/q2_types/tests/test_util.py +++ b/q2_types/tests/test_util.py @@ -5,9 +5,14 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +import os +from pathlib import Path + +from qiime2.plugin import model from qiime2.plugin.testing import TestPluginBase -from q2_types._util import _validate_num_partitions, _validate_mag_ids +from q2_types._util import _validate_num_partitions, _validate_mag_ids, \ + FileDictMixin, _process_path class TestUtil(TestPluginBase): @@ -46,3 +51,126 @@ def test_validate_mag_ids_invalid(self): 6, [(0, "a"), (0, "a"), (0, "c"), (0, "d"), (0, "e"), (0, "f")] ) + + def test_file_dict_mixin(self): + TestClass = type( + f"{model.DirectoryFormat.__name__}With{FileDictMixin.__name__}", + (FileDictMixin, model.DirectoryFormat), + {} + ) + fmt = TestClass(self.get_data_path("per_sample"), mode='r') + + obs = fmt.file_dict(suffixes=["_suffix"]) + exp = { + "sample1": { + "id1": os.path.join(str(fmt), "sample1", "id1_suffix.txt"), + }, + "sample2": { + "": os.path.join(str(fmt), "sample2", "suffix.txt"), + }, + } + self.assertDictEqual(obs, exp) + + obs = fmt.file_dict(suffixes=["_suffix"], relative=True) + exp = { + "sample1": { + "id1": "sample1/id1_suffix.txt", + }, + "sample2": { + "": "sample2/suffix.txt", + }, + } + self.assertDictEqual(obs, exp) + + def test_genes_dirfmt_genome_dict(self): + TestClass = type( + f"{model.DirectoryFormat.__name__}With{FileDictMixin.__name__}", + (FileDictMixin, model.DirectoryFormat), + {} + ) + fmt = TestClass(self.get_data_path("not_per_sample"), mode='r') + + obs = fmt.file_dict(suffixes=["_suffix1", "_suffix2"]) + exp = { + "id1": os.path.join(str(fmt), "id1_suffix1.txt"), + "id2": os.path.join(str(fmt), "id2_suffix2.txt"), + } + self.assertDictEqual(obs, exp) + + obs = fmt.file_dict( + suffixes=["_suffix1", "_suffix2"], + relative=True + ) + exp = { + "id1": "id1_suffix1.txt", + "id2": "id2_suffix2.txt", + } + self.assertDictEqual(obs, exp) + + +class TestProcessPath(TestPluginBase): + package = "q2_types.tests" + + def setUp(self): + super().setUp() + self.dir_fmt = model.DirectoryFormat() + + def test_process_path_with_suffix(self): + # Test when the file name ends with a given suffix + path = Path(self.dir_fmt.path / "sample_id_suffix1.txt") + suffixes = ["_suffix1", "_suffix2"] + + result_path, result_id = _process_path( + path, + relative=True, + dir_format=self.dir_fmt, + suffixes=suffixes + ) + + self.assertEqual(result_id, "sample_id") + self.assertEqual(result_path, "sample_id_suffix1.txt") + + def test_process_path_without_suffix(self): + # Test when no suffix matches the file name + path = Path(self.dir_fmt.path / "sample_id.txt") + suffixes = ["_suffix1", "_suffix2"] + + result_path, result_id = _process_path( + path, + relative=True, + dir_format=self.dir_fmt, + suffixes=suffixes + ) + + self.assertEqual(result_id, "sample_id") + self.assertEqual(result_path, "sample_id.txt") + + def test_process_path_absolute(self): + # Test when the relative flag is False (absolute path is returned) + path = Path(self.dir_fmt.path / "sample_id_suffix2.txt") + suffixes = ["_suffix1", "_suffix2"] + + result_path, result_id = _process_path( + path, + relative=False, + dir_format=self.dir_fmt, + suffixes=suffixes + ) + + self.assertEqual(result_id, "sample_id") + self.assertEqual(result_path, str(path.absolute())) + + def test_process_path_only_suffix(self): + # Test when the file name consists only of the suffix + path = Path(self.dir_fmt.path / "suffix1.txt") + suffixes = ["_suffix1", "_suffix2"] + + result_path, result_id = _process_path( + path, + relative=True, + dir_format=self.dir_fmt, + suffixes=suffixes + ) + + self.assertEqual(result_id, "") + self.assertEqual(result_path, "suffix1.txt") From b17c931c5a9e3020b3724d9c285064e152dd6070 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 15 Oct 2024 17:37:53 +0200 Subject: [PATCH 04/22] added to package dataa --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a71fae3..7a50367 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ }, package_data={ 'q2_types': ['citations.bib'], - 'q2_types.tests': ['data/*'], + 'q2_types.tests': ['data/*', 'data/*/*', 'data/*/*/*'], 'q2_types.bowtie2': ['citations.bib'], 'q2_types.distance_matrix.tests': ['data/*'], 'q2_types.feature_data.tests': ['data/*', From 65fe610c9c256f5908df812265c33a2d71cc500e Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 16 Oct 2024 15:19:36 +0200 Subject: [PATCH 05/22] added separator without tests --- q2_types/_util.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/q2_types/_util.py b/q2_types/_util.py index 9b8ddb7..c4e6918 100644 --- a/q2_types/_util.py +++ b/q2_types/_util.py @@ -142,7 +142,7 @@ def _validate_mag_ids( class FileDictMixin: - def file_dict(self, relative=False, suffixes=None): + def file_dict(self, relative=False, suffixes=None, separator=None): """ For per sample directories it returns a mapping of sample id to another dictionary where keys represent the file name and values @@ -176,7 +176,8 @@ def file_dict(self, relative=False, suffixes=None): path=path, relative=relative, dir_format=self, - suffixes=suffixes + suffixes=suffixes, + separator=separator ) ids[outer_id][inner_id] = str(file_path) @@ -186,8 +187,8 @@ def file_dict(self, relative=False, suffixes=None): path=entry, relative=relative, dir_format=self, - suffixes=suffixes - + suffixes=suffixes, + separator=separator ) ids[inner_id] = str(file_path) @@ -195,7 +196,7 @@ def file_dict(self, relative=False, suffixes=None): return dict(sorted(ids.items())) -def _process_path(path, relative, dir_format, suffixes): +def _process_path(path, relative, dir_format, suffixes, separator): """ This function processes the input file path to generate an absolute or relative path string and the ID derived from the file name. The ID is @@ -226,8 +227,8 @@ def _process_path(path, relative, dir_format, suffixes): if suffixes: for suffix in suffixes: - if file_name.endswith(suffix[1:]): - _id = file_name[:-len(suffix)] + if file_name.endswith(suffix): + _id = file_name[:-len(suffix)+len(separator)] break path_dict = ( From 688dd944f146406a657cf6b049be99db8dcaa378 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 17 Oct 2024 10:55:58 +0200 Subject: [PATCH 06/22] fixed bug slicing --- q2_types/_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q2_types/_util.py b/q2_types/_util.py index c4e6918..e9cc678 100644 --- a/q2_types/_util.py +++ b/q2_types/_util.py @@ -228,7 +228,7 @@ def _process_path(path, relative, dir_format, suffixes, separator): if suffixes: for suffix in suffixes: if file_name.endswith(suffix): - _id = file_name[:-len(suffix)+len(separator)] + _id = file_name[:-(len(suffix)+len(separator))] break path_dict = ( From a9cc86367c0ba65bca0c67f658e9076483a2ea15 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 17 Oct 2024 11:18:11 +0200 Subject: [PATCH 07/22] removed separator --- q2_types/_util.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/q2_types/_util.py b/q2_types/_util.py index e9cc678..8383d40 100644 --- a/q2_types/_util.py +++ b/q2_types/_util.py @@ -142,7 +142,7 @@ def _validate_mag_ids( class FileDictMixin: - def file_dict(self, relative=False, suffixes=None, separator=None): + def file_dict(self, relative=False, suffixes=None): """ For per sample directories it returns a mapping of sample id to another dictionary where keys represent the file name and values @@ -177,7 +177,6 @@ def file_dict(self, relative=False, suffixes=None, separator=None): relative=relative, dir_format=self, suffixes=suffixes, - separator=separator ) ids[outer_id][inner_id] = str(file_path) @@ -188,7 +187,6 @@ def file_dict(self, relative=False, suffixes=None, separator=None): relative=relative, dir_format=self, suffixes=suffixes, - separator=separator ) ids[inner_id] = str(file_path) @@ -196,7 +194,7 @@ def file_dict(self, relative=False, suffixes=None, separator=None): return dict(sorted(ids.items())) -def _process_path(path, relative, dir_format, suffixes, separator): +def _process_path(path, relative, dir_format, suffixes): """ This function processes the input file path to generate an absolute or relative path string and the ID derived from the file name. The ID is @@ -228,7 +226,7 @@ def _process_path(path, relative, dir_format, suffixes, separator): if suffixes: for suffix in suffixes: if file_name.endswith(suffix): - _id = file_name[:-(len(suffix)+len(separator))] + _id = file_name[:-len(suffix)] break path_dict = ( From 7ad2f0ddc8178f0834b6dd207351ece118f132e7 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 17 Oct 2024 13:14:05 +0200 Subject: [PATCH 08/22] removed option for id to be empty --- .../sample2/{suffix.txt => id2_suffix.txt} | 0 q2_types/tests/test_util.py | 23 +++++-------------- 2 files changed, 6 insertions(+), 17 deletions(-) rename q2_types/tests/data/per_sample/sample2/{suffix.txt => id2_suffix.txt} (100%) diff --git a/q2_types/tests/data/per_sample/sample2/suffix.txt b/q2_types/tests/data/per_sample/sample2/id2_suffix.txt similarity index 100% rename from q2_types/tests/data/per_sample/sample2/suffix.txt rename to q2_types/tests/data/per_sample/sample2/id2_suffix.txt diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py index 57b750b..7e473a3 100644 --- a/q2_types/tests/test_util.py +++ b/q2_types/tests/test_util.py @@ -52,6 +52,10 @@ def test_validate_mag_ids_invalid(self): [(0, "a"), (0, "a"), (0, "c"), (0, "d"), (0, "e"), (0, "f")] ) + +class TestFileDictMixing(TestPluginBase): + package = "q2_types.tests" + def test_file_dict_mixin(self): TestClass = type( f"{model.DirectoryFormat.__name__}With{FileDictMixin.__name__}", @@ -66,7 +70,7 @@ def test_file_dict_mixin(self): "id1": os.path.join(str(fmt), "sample1", "id1_suffix.txt"), }, "sample2": { - "": os.path.join(str(fmt), "sample2", "suffix.txt"), + "id2": os.path.join(str(fmt), "sample2", "id2_suffix.txt"), }, } self.assertDictEqual(obs, exp) @@ -77,7 +81,7 @@ def test_file_dict_mixin(self): "id1": "sample1/id1_suffix.txt", }, "sample2": { - "": "sample2/suffix.txt", + "id2": "sample2/id2_suffix.txt", }, } self.assertDictEqual(obs, exp) @@ -159,18 +163,3 @@ def test_process_path_absolute(self): self.assertEqual(result_id, "sample_id") self.assertEqual(result_path, str(path.absolute())) - - def test_process_path_only_suffix(self): - # Test when the file name consists only of the suffix - path = Path(self.dir_fmt.path / "suffix1.txt") - suffixes = ["_suffix1", "_suffix2"] - - result_path, result_id = _process_path( - path, - relative=True, - dir_format=self.dir_fmt, - suffixes=suffixes - ) - - self.assertEqual(result_id, "") - self.assertEqual(result_path, "suffix1.txt") From 9b3d30fe59fb27d07c4491d23920ba109ab08d40 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 17 Oct 2024 13:16:06 +0200 Subject: [PATCH 09/22] added mixing to outputs format --- q2_types/kraken2/_formats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q2_types/kraken2/_formats.py b/q2_types/kraken2/_formats.py index 6d6ad74..38c1583 100644 --- a/q2_types/kraken2/_formats.py +++ b/q2_types/kraken2/_formats.py @@ -147,7 +147,7 @@ def _validate_(self, level): ) -class Kraken2OutputDirectoryFormat(model.DirectoryFormat): +class Kraken2OutputDirectoryFormat(model.DirectoryFormat, FileDictMixin): reports = model.FileCollection( r'.+output\.(txt|tsv)$', format=Kraken2OutputFormat ) From e5547df33f7befd39f5f15bb364ad8bc63b60785 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 17 Oct 2024 13:57:31 +0200 Subject: [PATCH 10/22] added pathspec check --- q2_types/_util.py | 47 +++++++++++--------- q2_types/tests/data/not_per_sample/some_file | 0 q2_types/tests/data/per_sample/some_file | 0 q2_types/tests/test_util.py | 18 ++++---- 4 files changed, 36 insertions(+), 29 deletions(-) create mode 100644 q2_types/tests/data/not_per_sample/some_file create mode 100644 q2_types/tests/data/per_sample/some_file diff --git a/q2_types/_util.py b/q2_types/_util.py index 8383d40..e05ff73 100644 --- a/q2_types/_util.py +++ b/q2_types/_util.py @@ -7,6 +7,7 @@ # ---------------------------------------------------------------------------- import gzip import itertools +import re import warnings from collections import defaultdict from typing import List @@ -146,9 +147,10 @@ def file_dict(self, relative=False, suffixes=None): """ For per sample directories it returns a mapping of sample id to another dictionary where keys represent the file name and values - correspond to the filepath for each file. + correspond to the filepath for each file matching the pathspec. For files, it returns a mapping of file name to filepath for each - file. The specified suffixes are removed from filenames. + file matching the pathspec. The specified suffixes are removed + from filenames. Parameters --------- @@ -167,29 +169,34 @@ def file_dict(self, relative=False, suffixes=None): described above. Both levels of the dictionary are sorted alphabetically by key. """ + file_pattern = re.compile(self.pathspec) ids = defaultdict(dict) for entry in self.path.iterdir(): if entry.is_dir(): outer_id = entry.name for path in entry.iterdir(): + if file_pattern.match(path.name): + + file_path, inner_id = _process_path( + path=path, + relative=relative, + dir_format=self, + suffixes=suffixes, + ) + + ids[outer_id][inner_id] = str(file_path) + ids[outer_id] = dict(sorted(ids[outer_id].items())) + else: + if file_pattern.match(entry.name): + file_path, inner_id = _process_path( - path=path, + path=entry, relative=relative, dir_format=self, suffixes=suffixes, ) - ids[outer_id][inner_id] = str(file_path) - ids[outer_id] = dict(sorted(ids[outer_id].items())) - else: - file_path, inner_id = _process_path( - path=entry, - relative=relative, - dir_format=self, - suffixes=suffixes, - ) - - ids[inner_id] = str(file_path) + ids[inner_id] = str(file_path) return dict(sorted(ids.items())) @@ -198,7 +205,7 @@ def _process_path(path, relative, dir_format, suffixes): """ This function processes the input file path to generate an absolute or relative path string and the ID derived from the file name. The ID is - extracted by removing the one of the specified suffixes from the file + extracted by removing the one of the specified suffixes from the file name. If no suffixes are specified the ID is defined to be the filename. Parameters: @@ -208,12 +215,12 @@ def _process_path(path, relative, dir_format, suffixes): relative : bool A flag indicating whether the returned path should be relative to the directory formats path or absolute. - dir_format : DirectoryFormat. - Any object of class DirectoryFormat. + dir_format : model.DirectoryFormat. + Any object of class model.DirectoryFormat. Returns: ------- - path_dict : str + processed_path : str The full relative or absolut path to the file. _id : str The ID derived from the file name. ID will be "" if the filename @@ -229,9 +236,9 @@ def _process_path(path, relative, dir_format, suffixes): _id = file_name[:-len(suffix)] break - path_dict = ( + processed_path = ( path.absolute().relative_to(dir_format.path.absolute()) if relative else path.absolute() ) - return str(path_dict), _id \ No newline at end of file + return str(processed_path), _id \ No newline at end of file diff --git a/q2_types/tests/data/not_per_sample/some_file b/q2_types/tests/data/not_per_sample/some_file new file mode 100644 index 0000000..e69de29 diff --git a/q2_types/tests/data/per_sample/some_file b/q2_types/tests/data/per_sample/some_file new file mode 100644 index 0000000..e69de29 diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py index 7e473a3..772121d 100644 --- a/q2_types/tests/test_util.py +++ b/q2_types/tests/test_util.py @@ -56,13 +56,18 @@ def test_validate_mag_ids_invalid(self): class TestFileDictMixing(TestPluginBase): package = "q2_types.tests" - def test_file_dict_mixin(self): - TestClass = type( + def setUp(self): + super().setUp() + + self.TestClass = type( f"{model.DirectoryFormat.__name__}With{FileDictMixin.__name__}", (FileDictMixin, model.DirectoryFormat), {} ) - fmt = TestClass(self.get_data_path("per_sample"), mode='r') + self.TestClass.pathspec = r'.+\.(txt|tsv)$' + + def test_file_dict_mixin(self): + fmt = self.TestClass(self.get_data_path("per_sample"), mode='r') obs = fmt.file_dict(suffixes=["_suffix"]) exp = { @@ -87,12 +92,7 @@ def test_file_dict_mixin(self): self.assertDictEqual(obs, exp) def test_genes_dirfmt_genome_dict(self): - TestClass = type( - f"{model.DirectoryFormat.__name__}With{FileDictMixin.__name__}", - (FileDictMixin, model.DirectoryFormat), - {} - ) - fmt = TestClass(self.get_data_path("not_per_sample"), mode='r') + fmt = self.TestClass(self.get_data_path("not_per_sample"), mode='r') obs = fmt.file_dict(suffixes=["_suffix1", "_suffix2"]) exp = { From daf2866758a8eb41a114f975d905b84989c6b544 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 17 Oct 2024 13:58:56 +0200 Subject: [PATCH 11/22] lint --- q2_types/_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q2_types/_util.py b/q2_types/_util.py index e05ff73..58de578 100644 --- a/q2_types/_util.py +++ b/q2_types/_util.py @@ -241,4 +241,4 @@ def _process_path(path, relative, dir_format, suffixes): if relative else path.absolute() ) - return str(processed_path), _id \ No newline at end of file + return str(processed_path), _id From 46958562e726ee5aa6ff5c0090468efa8af05453 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 17 Oct 2024 14:32:23 +0200 Subject: [PATCH 12/22] added pathspec to kraken formats --- q2_types/kraken2/_formats.py | 10 ++++------ q2_types/per_sample_sequences/_formats.py | 5 +++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/q2_types/kraken2/_formats.py b/q2_types/kraken2/_formats.py index 38c1583..f406ac4 100644 --- a/q2_types/kraken2/_formats.py +++ b/q2_types/kraken2/_formats.py @@ -69,9 +69,8 @@ def _validate_(self, level): class Kraken2ReportDirectoryFormat(model.DirectoryFormat, FileDictMixin): - reports = model.FileCollection( - r'.+report\.(txt|tsv)$', format=Kraken2ReportFormat - ) + pathspec = r'.+report\.(txt|tsv)$' + reports = model.FileCollection(pathspec, format=Kraken2ReportFormat) @reports.set_path_maker def reports_path_maker(self, sample_id, mag_id=None): @@ -148,9 +147,8 @@ def _validate_(self, level): class Kraken2OutputDirectoryFormat(model.DirectoryFormat, FileDictMixin): - reports = model.FileCollection( - r'.+output\.(txt|tsv)$', format=Kraken2OutputFormat - ) + pathspec = r'.+output\.(txt|tsv)$' + reports = model.FileCollection(pathspec, format=Kraken2OutputFormat) @reports.set_path_maker def reports_path_maker(self, sample_id, mag_id=None): diff --git a/q2_types/per_sample_sequences/_formats.py b/q2_types/per_sample_sequences/_formats.py index 17bb3a4..4973821 100644 --- a/q2_types/per_sample_sequences/_formats.py +++ b/q2_types/per_sample_sequences/_formats.py @@ -25,7 +25,7 @@ from q2_types.bowtie2 import Bowtie2IndexDirFmt from q2_types.feature_data import DNAFASTAFormat from ._util import _parse_sequence_filename, _manifest_to_df -from .._util import FastqGzFormat +from .._util import FastqGzFormat, FileDictMixin class FastqAbsolutePathManifestFormatV2(model.TextFileFormat): @@ -536,7 +536,8 @@ def _validate_(self, level): class MultiFASTADirectoryFormat(MultiDirValidationMixin, - model.DirectoryFormat): + model.DirectoryFormat, + FileDictMixin): pathspec = r'.+\.(fa|fasta)$' sequences = model.FileCollection(pathspec, format=DNAFASTAFormat) From 2a8955605a4895e369f0d0dade3988eb0a6d195d Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 17 Oct 2024 15:30:49 +0200 Subject: [PATCH 13/22] removed genomedirectory format and fixed pathspec error in proteins --- q2_types/genome_data/__init__.py | 3 +- q2_types/genome_data/_formats.py | 69 ++++------------------ q2_types/genome_data/tests/test_formats.py | 18 +++--- 3 files changed, 23 insertions(+), 67 deletions(-) diff --git a/q2_types/genome_data/__init__.py b/q2_types/genome_data/__init__.py index 40283a8..1de55bd 100644 --- a/q2_types/genome_data/__init__.py +++ b/q2_types/genome_data/__init__.py @@ -11,7 +11,6 @@ GenesDirectoryFormat, ProteinsDirectoryFormat, LociDirectoryFormat, GFF3Format, OrthologFileFmt, SeedOrthologDirFmt, GenomeSequencesDirectoryFormat, OrthologAnnotationDirFmt, - GenomeDataDirectoryFormat, ) from ._objects import IntervalMetadataIterator from ._types import ( @@ -25,6 +24,6 @@ 'GenesDirectoryFormat', 'ProteinsDirectoryFormat', 'LociDirectoryFormat', 'IntervalMetadataIterator', 'OrthologFileFmt', 'Orthologs', 'SeedOrthologDirFmt', 'GenomeSequencesDirectoryFormat', 'DNASequence', - 'OrthologAnnotationDirFmt', 'NOG', 'GenomeDataDirectoryFormat', + 'OrthologAnnotationDirFmt', 'NOG', 'collate_orthologs', 'partition_orthologs', "collate_ortholog_annotations" ] diff --git a/q2_types/genome_data/_formats.py b/q2_types/genome_data/_formats.py index 15bbaf7..456ebe6 100644 --- a/q2_types/genome_data/_formats.py +++ b/q2_types/genome_data/_formats.py @@ -11,6 +11,7 @@ import qiime2.plugin.model as model from qiime2.plugin import ValidationError +from q2_types._util import FileDictMixin from q2_types.feature_data import DNAFASTAFormat, ProteinFASTAFormat @@ -19,63 +20,18 @@ def _validate_(self, level): pass -class GenomeDataDirectoryFormat(model.DirectoryFormat): - def genome_dict(self, relative=False): - """ - For per sample directories it returns a mapping of sample id to - another dictionary where keys represent the file name and values - correspond to the filepath for each file. - For files, it returns a mapping of file name to filepath for each file. - - Parameters - --------- - relative : bool - Whether to return filepaths relative to the directory's location. - Returns absolute filepaths by default. - - Returns - ------- - dict - Mapping of filename -> filepath as described above. - Or mapping of sample id -> dict {filename: filepath} as - described above. - Both levels of the dictionary are sorted alphabetically by key. - """ - ids = defaultdict(dict) - for entry in self.path.iterdir(): - if entry.is_dir(): - sample_id = entry.name - for path in entry.iterdir(): - file_name = path.stem - file_path = ( - path.absolute().relative_to(self.path.absolute()) - if relative else path.absolute() - ) - ids[sample_id][file_name] = str(file_path) - ids[sample_id] = dict(sorted(ids[sample_id].items())) - else: - file_name = entry.stem - file_path = ( - entry.absolute().relative_to(self.path.absolute()) - if relative else entry.absolute() - ) - ids[file_name] = str(file_path) - - return dict(sorted(ids.items())) - - -class GenesDirectoryFormat(GenomeDataDirectoryFormat): - genes = model.FileCollection(r'.+\.(fa|fna|fasta)$', - format=DNAFASTAFormat) +class GenesDirectoryFormat(model.DirectoryFormat, FileDictMixin): + pathspec = r'.+\.(fa|fna|fasta)$' + genes = model.FileCollection(pathspec, format=DNAFASTAFormat) @genes.set_path_maker def genes_path_maker(self, genome_id): return '%s.fasta' % genome_id -class ProteinsDirectoryFormat(GenomeDataDirectoryFormat): - proteins = model.FileCollection(r'.+\.(fa|faa|fasta)$', - format=ProteinFASTAFormat) +class ProteinsDirectoryFormat(model.DirectoryFormat, FileDictMixin): + pathspec = r'.+\.(fa|faa|fasta)$' + proteins = model.FileCollection(pathspec, format=ProteinFASTAFormat) @proteins.set_path_maker def proteins_path_maker(self, genome_id): @@ -205,17 +161,18 @@ def _validate_(self, level): f'{line_number}') from e -class LociDirectoryFormat(GenomeDataDirectoryFormat): - loci = model.FileCollection(r'.+\.gff$', - format=GFF3Format) +class LociDirectoryFormat(model.DirectoryFormat, FileDictMixin): + pathspec = r'.+\.gff$' + loci = model.FileCollection(pathspec, format=GFF3Format) @loci.set_path_maker def loci_path_maker(self, genome_id): return '%s.gff' % genome_id -class GenomeSequencesDirectoryFormat(GenomeDataDirectoryFormat): - genomes = model.FileCollection(r'.+\.(fasta|fa)$', format=DNAFASTAFormat) +class GenomeSequencesDirectoryFormat(model.DirectoryFormat, FileDictMixin): + pathspec = r'.+\.(fasta|fa)$' + genomes = model.FileCollection(pathspec, format=DNAFASTAFormat) @genomes.set_path_maker def genomes_path_maker(self, genome_id): diff --git a/q2_types/genome_data/tests/test_formats.py b/q2_types/genome_data/tests/test_formats.py index 0b6e3c9..9f37403 100644 --- a/q2_types/genome_data/tests/test_formats.py +++ b/q2_types/genome_data/tests/test_formats.py @@ -180,17 +180,17 @@ def test_ortholog_annotations_annot_dict(self): } self.assertDictEqual(obs, exp) - def test_genome_data_dirfmt_samples_genome_dict(self): - genes = GenomeDataDirectoryFormat( + def test_genes_dirfmt_samples_file_dict(self): + genes = GenesDirectoryFormat( self.get_data_path('genes_samples'), mode='r') - obs = genes.genome_dict() + obs = genes.file_dict() exp = { 'sample1': { - 'genes1': str(Path(genes.path / 'sample1/genes1.fa')), + 'genes1': str(genes.path / 'sample1/genes1.fa'), }, 'sample2': { - 'genes2': str(Path(genes.path / 'sample2/genes2.fa')), + 'genes2': str(genes.path / 'sample2/genes2.fa'), }, } self.assertDictEqual(obs, exp) @@ -206,15 +206,15 @@ def test_genome_data_dirfmt_samples_genome_dict(self): } self.assertDictEqual(obs, exp) - def test_genes_dirfmt_genome_dict(self): + def test_genes_dirfmt_file_dict(self): genes = ( - GenomeDataDirectoryFormat(self.get_data_path('genes'), mode='r') + GenesDirectoryFormat(self.get_data_path('genes'), mode='r') ) obs = genes.genome_dict() exp = { - 'genes1': str(Path(genes.path / 'genes1.fa')), - 'genes2': str(Path(genes.path / 'genes2.fa')) + 'genes1': str(genes.path / 'genes1.fa'), + 'genes2': str(genes.path / 'genes2.fa') } self.assertDictEqual(obs, exp) From 1c8c25ba647fd24e4894c2d927f90685391ed73f Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 17 Oct 2024 15:31:55 +0200 Subject: [PATCH 14/22] removed tests for genomedatadirfmt file dict --- q2_types/genome_data/tests/test_formats.py | 47 ---------------------- 1 file changed, 47 deletions(-) diff --git a/q2_types/genome_data/tests/test_formats.py b/q2_types/genome_data/tests/test_formats.py index 9f37403..a17685d 100644 --- a/q2_types/genome_data/tests/test_formats.py +++ b/q2_types/genome_data/tests/test_formats.py @@ -6,7 +6,6 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import unittest -from pathlib import Path from qiime2.core.exceptions import ValidationError from qiime2.plugin.testing import TestPluginBase @@ -15,7 +14,6 @@ GenesDirectoryFormat, ProteinsDirectoryFormat, GFF3Format, LociDirectoryFormat, SeedOrthologDirFmt, OrthologFileFmt, OrthologAnnotationDirFmt, GenomeSequencesDirectoryFormat, - GenomeDataDirectoryFormat ) @@ -180,51 +178,6 @@ def test_ortholog_annotations_annot_dict(self): } self.assertDictEqual(obs, exp) - def test_genes_dirfmt_samples_file_dict(self): - genes = GenesDirectoryFormat( - self.get_data_path('genes_samples'), mode='r') - - obs = genes.file_dict() - exp = { - 'sample1': { - 'genes1': str(genes.path / 'sample1/genes1.fa'), - }, - 'sample2': { - 'genes2': str(genes.path / 'sample2/genes2.fa'), - }, - } - self.assertDictEqual(obs, exp) - - obs = genes.genome_dict(relative=True) - exp = { - 'sample1': { - 'genes1': 'sample1/genes1.fa', - }, - 'sample2': { - 'genes2': 'sample2/genes2.fa', - }, - } - self.assertDictEqual(obs, exp) - - def test_genes_dirfmt_file_dict(self): - genes = ( - GenesDirectoryFormat(self.get_data_path('genes'), mode='r') - ) - - obs = genes.genome_dict() - exp = { - 'genes1': str(genes.path / 'genes1.fa'), - 'genes2': str(genes.path / 'genes2.fa') - } - self.assertDictEqual(obs, exp) - - obs = genes.genome_dict(relative=True) - exp = { - 'genes1': 'genes1.fa', - 'genes2': 'genes2.fa' - } - self.assertDictEqual(obs, exp) - if __name__ == '__main__': unittest.main() From 3c282919f623b9b72c57cd6d8d6587d59d647bf7 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 17 Oct 2024 15:33:46 +0200 Subject: [PATCH 15/22] removed filemixin from mags --- q2_types/per_sample_sequences/_formats.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/q2_types/per_sample_sequences/_formats.py b/q2_types/per_sample_sequences/_formats.py index 4973821..d762f1f 100644 --- a/q2_types/per_sample_sequences/_formats.py +++ b/q2_types/per_sample_sequences/_formats.py @@ -536,8 +536,7 @@ def _validate_(self, level): class MultiFASTADirectoryFormat(MultiDirValidationMixin, - model.DirectoryFormat, - FileDictMixin): + model.DirectoryFormat): pathspec = r'.+\.(fa|fasta)$' sequences = model.FileCollection(pathspec, format=DNAFASTAFormat) From e77d66f36328ea32e84bd43d0b3ca6a43f151df3 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Thu, 17 Oct 2024 15:58:01 +0200 Subject: [PATCH 16/22] lint --- q2_types/genome_data/_formats.py | 1 - q2_types/per_sample_sequences/_formats.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/q2_types/genome_data/_formats.py b/q2_types/genome_data/_formats.py index 456ebe6..868a9a2 100644 --- a/q2_types/genome_data/_formats.py +++ b/q2_types/genome_data/_formats.py @@ -6,7 +6,6 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import re -from collections import defaultdict import qiime2.plugin.model as model from qiime2.plugin import ValidationError diff --git a/q2_types/per_sample_sequences/_formats.py b/q2_types/per_sample_sequences/_formats.py index d762f1f..17bb3a4 100644 --- a/q2_types/per_sample_sequences/_formats.py +++ b/q2_types/per_sample_sequences/_formats.py @@ -25,7 +25,7 @@ from q2_types.bowtie2 import Bowtie2IndexDirFmt from q2_types.feature_data import DNAFASTAFormat from ._util import _parse_sequence_filename, _manifest_to_df -from .._util import FastqGzFormat, FileDictMixin +from .._util import FastqGzFormat class FastqAbsolutePathManifestFormatV2(model.TextFileFormat): From d6d80ee724a30705ea6663da7e5c05a78f17824f Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Wed, 5 Feb 2025 13:46:41 +0100 Subject: [PATCH 17/22] canges after review --- q2_types/_util.py | 23 ++++++++++++----------- q2_types/tests/test_util.py | 6 ++++-- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/q2_types/_util.py b/q2_types/_util.py index f724c07..9af74b8 100644 --- a/q2_types/_util.py +++ b/q2_types/_util.py @@ -143,34 +143,33 @@ def _validate_mag_ids( class FileDictMixin: - def file_dict(self, relative=False, suffixes=None): + def file_dict(self, relative=False): """ For per sample directories it returns a mapping of sample id to another dictionary where keys represent the file name and values correspond to the filepath for each file matching the pathspec. For files, it returns a mapping of file name to filepath for each - file matching the pathspec. The specified suffixes are removed - from filenames. + file matching the pathspec. If the dir format has the attribute + 'suffixes', then these are removed from filenames. Parameters --------- relative : bool Whether to return filepaths relative to the directory's location. Returns absolute filepaths by default. - suffixes : List - A list of suffixes that should be removed from the filenames to - generate the ID. Returns ------- dict - Mapping of filename -> filepath as described above. + Mapping of sample id -> filepath as described above. Or mapping of sample id -> dict {filename: filepath} as described above. Both levels of the dictionary are sorted alphabetically by key. """ + suffixes = getattr(self, "suffixes", []) file_pattern = re.compile(self.pathspec) ids = defaultdict(dict) + for entry in self.path.iterdir(): if entry.is_dir(): outer_id = entry.name @@ -184,7 +183,7 @@ def file_dict(self, relative=False, suffixes=None): suffixes=suffixes, ) - ids[outer_id][inner_id] = str(file_path) + ids[outer_id][inner_id] = file_path ids[outer_id] = dict(sorted(ids[outer_id].items())) else: if file_pattern.match(entry.name): @@ -196,7 +195,7 @@ def file_dict(self, relative=False, suffixes=None): suffixes=suffixes, ) - ids[inner_id] = str(file_path) + ids[inner_id] = file_path return dict(sorted(ids.items())) @@ -217,17 +216,19 @@ def _process_path(path, relative, dir_format, suffixes): to the directory formats path or absolute. dir_format : model.DirectoryFormat. Any object of class model.DirectoryFormat. + suffixes : List + A list of suffixes that should be removed from the filenames to + generate the ID. Returns: ------- processed_path : str - The full relative or absolut path to the file. + The full relative or absolute path to the file. _id : str The ID derived from the file name. ID will be "" if the filename consists only of the suffix. """ file_name = path.stem - _id = file_name if suffixes: diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py index 110a47e..6e75077 100644 --- a/q2_types/tests/test_util.py +++ b/q2_types/tests/test_util.py @@ -67,9 +67,10 @@ def setUp(self): self.TestClass.pathspec = r'.+\.(txt|tsv)$' def test_file_dict_mixin(self): + self.TestClass.suffixes = ["_suffix"] fmt = self.TestClass(self.get_data_path("per_sample"), mode='r') - obs = fmt.file_dict(suffixes=["_suffix"]) + obs = fmt.file_dict() exp = { "sample1": { "id1": os.path.join(str(fmt), "sample1", "id1_suffix.txt"), @@ -92,9 +93,10 @@ def test_file_dict_mixin(self): self.assertDictEqual(obs, exp) def test_genes_dirfmt_genome_dict(self): + self.TestClass.suffixes = ["_suffix1", "_suffix2"] fmt = self.TestClass(self.get_data_path("not_per_sample"), mode='r') - obs = fmt.file_dict(suffixes=["_suffix1", "_suffix2"]) + obs = fmt.file_dict() exp = { "id1": os.path.join(str(fmt), "id1_suffix1.txt"), "id2": os.path.join(str(fmt), "id2_suffix2.txt"), From b567c5a4a43947488a3761bbc6242d2e366db110 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 7 Feb 2025 15:13:47 +0100 Subject: [PATCH 18/22] added suffixes attribute to the kraken formats --- q2_types/kraken2/_formats.py | 2 ++ q2_types/kraken2/tests/test_formats.py | 1 + q2_types/tests/test_util.py | 7 ++----- setup.py | 0 4 files changed, 5 insertions(+), 5 deletions(-) delete mode 100644 setup.py diff --git a/q2_types/kraken2/_formats.py b/q2_types/kraken2/_formats.py index f406ac4..2c0d0e0 100644 --- a/q2_types/kraken2/_formats.py +++ b/q2_types/kraken2/_formats.py @@ -70,6 +70,7 @@ def _validate_(self, level): class Kraken2ReportDirectoryFormat(model.DirectoryFormat, FileDictMixin): pathspec = r'.+report\.(txt|tsv)$' + suffixes = ['.report'] reports = model.FileCollection(pathspec, format=Kraken2ReportFormat) @reports.set_path_maker @@ -148,6 +149,7 @@ def _validate_(self, level): class Kraken2OutputDirectoryFormat(model.DirectoryFormat, FileDictMixin): pathspec = r'.+output\.(txt|tsv)$' + suffixes = ['.output'] reports = model.FileCollection(pathspec, format=Kraken2OutputFormat) @reports.set_path_maker diff --git a/q2_types/kraken2/tests/test_formats.py b/q2_types/kraken2/tests/test_formats.py index 038eede..adce9d0 100644 --- a/q2_types/kraken2/tests/test_formats.py +++ b/q2_types/kraken2/tests/test_formats.py @@ -85,6 +85,7 @@ def test_db_report_format_wrong_types(self): def test_report_dirfmt_from_reads(self): dirpath = self.get_data_path('reports-reads') fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r') + a = fmt.file_dict() fmt.validate() def test_report_dirfmt_from_mags(self): diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py index 6e75077..5dae298 100644 --- a/q2_types/tests/test_util.py +++ b/q2_types/tests/test_util.py @@ -81,7 +81,7 @@ def test_file_dict_mixin(self): } self.assertDictEqual(obs, exp) - obs = fmt.file_dict(suffixes=["_suffix"], relative=True) + obs = fmt.file_dict(relative=True) exp = { "sample1": { "id1": "sample1/id1_suffix.txt", @@ -103,10 +103,7 @@ def test_genes_dirfmt_genome_dict(self): } self.assertDictEqual(obs, exp) - obs = fmt.file_dict( - suffixes=["_suffix1", "_suffix2"], - relative=True - ) + obs = fmt.file_dict(relative=True) exp = { "id1": "id1_suffix1.txt", "id2": "id2_suffix2.txt", diff --git a/setup.py b/setup.py deleted file mode 100644 index e69de29..0000000 From 123faa7b3a04c4a25be1083395d0d3f92b8aa26b Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 7 Feb 2025 15:30:09 +0100 Subject: [PATCH 19/22] added test for filedictmixin with krakenoutput format --- .../sample1/bin1.output.txt | 32 +++++++++++++++++ .../sample1/bin2.output.txt | 32 +++++++++++++++++ .../sample2/bin3.output.txt | 32 +++++++++++++++++ q2_types/tests/test_util.py | 34 +++++++++++++++++-- 4 files changed, 128 insertions(+), 2 deletions(-) create mode 100755 q2_types/tests/data/kraken-outputs-mags/sample1/bin1.output.txt create mode 100755 q2_types/tests/data/kraken-outputs-mags/sample1/bin2.output.txt create mode 100755 q2_types/tests/data/kraken-outputs-mags/sample2/bin3.output.txt diff --git a/q2_types/tests/data/kraken-outputs-mags/sample1/bin1.output.txt b/q2_types/tests/data/kraken-outputs-mags/sample1/bin1.output.txt new file mode 100755 index 0000000..21a5992 --- /dev/null +++ b/q2_types/tests/data/kraken-outputs-mags/sample1/bin1.output.txt @@ -0,0 +1,32 @@ +C k119_33069 1912795 10855 1912795:Q +C k119_55515 1583098 5698 1583098:Q +C k119_66468 1323375 5173 1323375:Q +C k119_33506 182217 17101 182217:Q +C k119_22814 1472 19997 1472:Q +C k119_23274 29388 23523 29388:Q +C k119_45180 545501 25821 545501:Q +C k119_34380 1218 4423 1218:Q +C k119_1654 2518177 31450 2518177:Q +C k119_45407 221027 2908 221027:Q +C k119_12788 59919 2856 59919:Q +U k119_34900 0 3045 0:Q +C k119_45855 851 19053 851:Q +C k119_90411 2647897 2589 2647897:Q +C k119_57806 2653681 4515 2653681:Q +C k119_58481 131567 19174 131567:Q +C k119_47669 2682541 11848 2682541:Q +C k119_59208 1977865 3665 1977865:Q +C k119_16398 2770780 5030 2770780:Q +C k119_60835 400634 2807 400634:Q +C k119_49584 2490633 6493 2490633:Q +C k119_28869 111780 8356 111780:Q +C k119_94747 2305987 3774 2305987:Q +C k119_40414 983544 27806 983544:Q +C k119_73618 2563896 3473 2563896:Q +C k119_84540 332101 3409 332101:Q +C k119_73768 2593542 29942 2593542:Q +C k119_41848 34105 8793 34105:Q +C k119_43035 1301 4680 1301:Q +C k119_65066 1547445 10430 1547445:Q +C k119_10361 491950 68731 491950:Q +C k119_10711 52959 8685 52959:Q diff --git a/q2_types/tests/data/kraken-outputs-mags/sample1/bin2.output.txt b/q2_types/tests/data/kraken-outputs-mags/sample1/bin2.output.txt new file mode 100755 index 0000000..21a5992 --- /dev/null +++ b/q2_types/tests/data/kraken-outputs-mags/sample1/bin2.output.txt @@ -0,0 +1,32 @@ +C k119_33069 1912795 10855 1912795:Q +C k119_55515 1583098 5698 1583098:Q +C k119_66468 1323375 5173 1323375:Q +C k119_33506 182217 17101 182217:Q +C k119_22814 1472 19997 1472:Q +C k119_23274 29388 23523 29388:Q +C k119_45180 545501 25821 545501:Q +C k119_34380 1218 4423 1218:Q +C k119_1654 2518177 31450 2518177:Q +C k119_45407 221027 2908 221027:Q +C k119_12788 59919 2856 59919:Q +U k119_34900 0 3045 0:Q +C k119_45855 851 19053 851:Q +C k119_90411 2647897 2589 2647897:Q +C k119_57806 2653681 4515 2653681:Q +C k119_58481 131567 19174 131567:Q +C k119_47669 2682541 11848 2682541:Q +C k119_59208 1977865 3665 1977865:Q +C k119_16398 2770780 5030 2770780:Q +C k119_60835 400634 2807 400634:Q +C k119_49584 2490633 6493 2490633:Q +C k119_28869 111780 8356 111780:Q +C k119_94747 2305987 3774 2305987:Q +C k119_40414 983544 27806 983544:Q +C k119_73618 2563896 3473 2563896:Q +C k119_84540 332101 3409 332101:Q +C k119_73768 2593542 29942 2593542:Q +C k119_41848 34105 8793 34105:Q +C k119_43035 1301 4680 1301:Q +C k119_65066 1547445 10430 1547445:Q +C k119_10361 491950 68731 491950:Q +C k119_10711 52959 8685 52959:Q diff --git a/q2_types/tests/data/kraken-outputs-mags/sample2/bin3.output.txt b/q2_types/tests/data/kraken-outputs-mags/sample2/bin3.output.txt new file mode 100755 index 0000000..21a5992 --- /dev/null +++ b/q2_types/tests/data/kraken-outputs-mags/sample2/bin3.output.txt @@ -0,0 +1,32 @@ +C k119_33069 1912795 10855 1912795:Q +C k119_55515 1583098 5698 1583098:Q +C k119_66468 1323375 5173 1323375:Q +C k119_33506 182217 17101 182217:Q +C k119_22814 1472 19997 1472:Q +C k119_23274 29388 23523 29388:Q +C k119_45180 545501 25821 545501:Q +C k119_34380 1218 4423 1218:Q +C k119_1654 2518177 31450 2518177:Q +C k119_45407 221027 2908 221027:Q +C k119_12788 59919 2856 59919:Q +U k119_34900 0 3045 0:Q +C k119_45855 851 19053 851:Q +C k119_90411 2647897 2589 2647897:Q +C k119_57806 2653681 4515 2653681:Q +C k119_58481 131567 19174 131567:Q +C k119_47669 2682541 11848 2682541:Q +C k119_59208 1977865 3665 1977865:Q +C k119_16398 2770780 5030 2770780:Q +C k119_60835 400634 2807 400634:Q +C k119_49584 2490633 6493 2490633:Q +C k119_28869 111780 8356 111780:Q +C k119_94747 2305987 3774 2305987:Q +C k119_40414 983544 27806 983544:Q +C k119_73618 2563896 3473 2563896:Q +C k119_84540 332101 3409 332101:Q +C k119_73768 2593542 29942 2593542:Q +C k119_41848 34105 8793 34105:Q +C k119_43035 1301 4680 1301:Q +C k119_65066 1547445 10430 1547445:Q +C k119_10361 491950 68731 491950:Q +C k119_10711 52959 8685 52959:Q diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py index 5dae298..0b15eee 100644 --- a/q2_types/tests/test_util.py +++ b/q2_types/tests/test_util.py @@ -8,6 +8,7 @@ import os from pathlib import Path +from q2_types.kraken2 import Kraken2OutputDirectoryFormat from qiime2.plugin import model from qiime2.plugin.testing import TestPluginBase @@ -66,7 +67,7 @@ def setUp(self): ) self.TestClass.pathspec = r'.+\.(txt|tsv)$' - def test_file_dict_mixin(self): + def test_file_dict_mixin_per_sample(self): self.TestClass.suffixes = ["_suffix"] fmt = self.TestClass(self.get_data_path("per_sample"), mode='r') @@ -92,7 +93,7 @@ def test_file_dict_mixin(self): } self.assertDictEqual(obs, exp) - def test_genes_dirfmt_genome_dict(self): + def test_file_dict_mixin_per_sample_not_per_sample(self): self.TestClass.suffixes = ["_suffix1", "_suffix2"] fmt = self.TestClass(self.get_data_path("not_per_sample"), mode='r') @@ -110,6 +111,35 @@ def test_genes_dirfmt_genome_dict(self): } self.assertDictEqual(obs, exp) + def test_file_dict_mixin_kraken_outputs(self): + fmt = Kraken2OutputDirectoryFormat( + self.get_data_path("kraken-outputs-mags"), mode='r' + ) + + obs = fmt.file_dict() + exp = { + "sample1": { + "bin1": os.path.join(str(fmt), "sample1", "bin1.output.txt"), + "bin2": os.path.join(str(fmt), "sample1", "bin2.output.txt"), + }, + "sample2": { + "bin3": os.path.join(str(fmt), "sample2", "bin3.output.txt"), + }, + } + self.assertDictEqual(obs, exp) + + obs = fmt.file_dict(relative=True) + exp = { + "sample1": { + "bin1": "sample1/bin1.output.txt", + "bin2": "sample1/bin2.output.txt", + }, + "sample2": { + "bin3": "sample2/bin3.output.txt", + }, + } + self.assertDictEqual(obs, exp) + class TestProcessPath(TestPluginBase): package = "q2_types.tests" From ec6ac6fcef77439c2770743ec49c2bd2c559bf70 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Fri, 7 Feb 2025 15:32:41 +0100 Subject: [PATCH 20/22] removed debug print --- q2_types/kraken2/tests/test_formats.py | 1 - 1 file changed, 1 deletion(-) diff --git a/q2_types/kraken2/tests/test_formats.py b/q2_types/kraken2/tests/test_formats.py index adce9d0..038eede 100644 --- a/q2_types/kraken2/tests/test_formats.py +++ b/q2_types/kraken2/tests/test_formats.py @@ -85,7 +85,6 @@ def test_db_report_format_wrong_types(self): def test_report_dirfmt_from_reads(self): dirpath = self.get_data_path('reports-reads') fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r') - a = fmt.file_dict() fmt.validate() def test_report_dirfmt_from_mags(self): From 8fb5a69e79efdb6810c9cec53840dcd5d9307409 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 11 Feb 2025 10:50:21 +0100 Subject: [PATCH 21/22] added process_path to the filedictmixin and changed the tests --- q2_types/_util.py | 94 +++++++++---------- .../{id1_suffix.txt => id1_suffix1.txt} | 0 .../{id2_suffix.txt => id2_suffix1.txt} | 0 q2_types/tests/test_util.py | 57 ++++------- 4 files changed, 64 insertions(+), 87 deletions(-) rename q2_types/tests/data/per_sample/sample1/{id1_suffix.txt => id1_suffix1.txt} (100%) rename q2_types/tests/data/per_sample/sample2/{id2_suffix.txt => id2_suffix1.txt} (100%) diff --git a/q2_types/_util.py b/q2_types/_util.py index 9af74b8..8cd80a7 100644 --- a/q2_types/_util.py +++ b/q2_types/_util.py @@ -166,7 +166,6 @@ def file_dict(self, relative=False): described above. Both levels of the dictionary are sorted alphabetically by key. """ - suffixes = getattr(self, "suffixes", []) file_pattern = re.compile(self.pathspec) ids = defaultdict(dict) @@ -176,11 +175,9 @@ def file_dict(self, relative=False): for path in entry.iterdir(): if file_pattern.match(path.name): - file_path, inner_id = _process_path( + file_path, inner_id = self._process_path( path=path, relative=relative, - dir_format=self, - suffixes=suffixes, ) ids[outer_id][inner_id] = file_path @@ -188,11 +185,9 @@ def file_dict(self, relative=False): else: if file_pattern.match(entry.name): - file_path, inner_id = _process_path( + file_path, inner_id = self._process_path( path=entry, relative=relative, - dir_format=self, - suffixes=suffixes, ) ids[inner_id] = file_path @@ -200,46 +195,45 @@ def file_dict(self, relative=False): return dict(sorted(ids.items())) -def _process_path(path, relative, dir_format, suffixes): - """ - This function processes the input file path to generate an absolute or - relative path string and the ID derived from the file name. The ID is - extracted by removing the one of the specified suffixes from the file - name. If no suffixes are specified the ID is defined to be the filename. - - Parameters: - --------- - path : Path - A Path object representing the file path to process. - relative : bool - A flag indicating whether the returned path should be relative - to the directory formats path or absolute. - dir_format : model.DirectoryFormat. - Any object of class model.DirectoryFormat. - suffixes : List - A list of suffixes that should be removed from the filenames to - generate the ID. - - Returns: - ------- - processed_path : str - The full relative or absolute path to the file. - _id : str - The ID derived from the file name. ID will be "" if the filename - consists only of the suffix. - """ - file_name = path.stem - _id = file_name - - if suffixes: - for suffix in suffixes: - if file_name.endswith(suffix): - _id = file_name[:-len(suffix)] - break - - processed_path = ( - path.absolute().relative_to(dir_format.path.absolute()) - if relative - else path.absolute() - ) - return str(processed_path), _id + def _process_path(self, path, relative=False): + """ + This function processes the input file path to generate an absolute or + relative path string and the ID derived from the file name. The ID is + extracted by removing the one of the specified suffixes from the file + name. If the class does not have a suffixes attribute, then the ID is + defined to be the filename. + + Parameters: + --------- + path : Path + A Path object representing the file path to process. + relative : bool + A flag indicating whether the returned path should be relative + to the directory formats path or absolute. + dir_format : model.DirectoryFormat. + Any object of class model.DirectoryFormat. + + Returns: + ------- + processed_path : str + The full relative or absolute path to the file. + _id : str + The ID derived from the file name. ID will be "" if the filename + consists only of the suffix. + """ + file_name = path.stem + _id = file_name + suffixes = getattr(self, "suffixes", []) + + if suffixes: + for suffix in suffixes: + if file_name.endswith(suffix): + _id = file_name[:-len(suffix)] + break + + processed_path = ( + path.absolute().relative_to(self.path.absolute()) + if relative + else path.absolute() + ) + return str(processed_path), _id diff --git a/q2_types/tests/data/per_sample/sample1/id1_suffix.txt b/q2_types/tests/data/per_sample/sample1/id1_suffix1.txt similarity index 100% rename from q2_types/tests/data/per_sample/sample1/id1_suffix.txt rename to q2_types/tests/data/per_sample/sample1/id1_suffix1.txt diff --git a/q2_types/tests/data/per_sample/sample2/id2_suffix.txt b/q2_types/tests/data/per_sample/sample2/id2_suffix1.txt similarity index 100% rename from q2_types/tests/data/per_sample/sample2/id2_suffix.txt rename to q2_types/tests/data/per_sample/sample2/id2_suffix1.txt diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py index 0b15eee..5e0ff7d 100644 --- a/q2_types/tests/test_util.py +++ b/q2_types/tests/test_util.py @@ -13,7 +13,7 @@ from qiime2.plugin.testing import TestPluginBase from q2_types._util import _validate_num_partitions, _validate_mag_ids, \ - FileDictMixin, _process_path + FileDictMixin class TestUtil(TestPluginBase): @@ -54,7 +54,7 @@ def test_validate_mag_ids_invalid(self): ) -class TestFileDictMixing(TestPluginBase): +class TestFileDictMixin(TestPluginBase): package = "q2_types.tests" def setUp(self): @@ -66,18 +66,18 @@ def setUp(self): {} ) self.TestClass.pathspec = r'.+\.(txt|tsv)$' + self.TestClass.suffixes = ["_suffix1", "_suffix2"] def test_file_dict_mixin_per_sample(self): - self.TestClass.suffixes = ["_suffix"] fmt = self.TestClass(self.get_data_path("per_sample"), mode='r') obs = fmt.file_dict() exp = { "sample1": { - "id1": os.path.join(str(fmt), "sample1", "id1_suffix.txt"), + "id1": os.path.join(str(fmt), "sample1", "id1_suffix1.txt"), }, "sample2": { - "id2": os.path.join(str(fmt), "sample2", "id2_suffix.txt"), + "id2": os.path.join(str(fmt), "sample2", "id2_suffix1.txt"), }, } self.assertDictEqual(obs, exp) @@ -85,16 +85,15 @@ def test_file_dict_mixin_per_sample(self): obs = fmt.file_dict(relative=True) exp = { "sample1": { - "id1": "sample1/id1_suffix.txt", + "id1": "sample1/id1_suffix1.txt", }, "sample2": { - "id2": "sample2/id2_suffix.txt", + "id2": "sample2/id2_suffix1.txt", }, } self.assertDictEqual(obs, exp) - def test_file_dict_mixin_per_sample_not_per_sample(self): - self.TestClass.suffixes = ["_suffix1", "_suffix2"] + def test_file_dict_mixin_not_per_sample(self): fmt = self.TestClass(self.get_data_path("not_per_sample"), mode='r') obs = fmt.file_dict() @@ -141,54 +140,38 @@ def test_file_dict_mixin_kraken_outputs(self): self.assertDictEqual(obs, exp) -class TestProcessPath(TestPluginBase): - package = "q2_types.tests" - - def setUp(self): - super().setUp() - self.dir_fmt = model.DirectoryFormat() - def test_process_path_with_suffix(self): - # Test when the file name ends with a given suffix - path = Path(self.dir_fmt.path / "sample_id_suffix1.txt") - suffixes = ["_suffix1", "_suffix2"] + # Test when class does have suffixes attribute + test_class = self.TestClass() + path = Path(test_class.path / "sample_id_suffix1.txt") - result_path, result_id = _process_path( + result_path, result_id = test_class._process_path( path, relative=True, - dir_format=self.dir_fmt, - suffixes=suffixes ) self.assertEqual(result_id, "sample_id") self.assertEqual(result_path, "sample_id_suffix1.txt") def test_process_path_without_suffix(self): - # Test when no suffix matches the file name - path = Path(self.dir_fmt.path / "sample_id.txt") - suffixes = ["_suffix1", "_suffix2"] + # Test when class does not have suffixes attribute + test_class = self.TestClass() + delattr(self.TestClass, "suffixes") + path = Path(test_class.path / "sample_id.txt") - result_path, result_id = _process_path( + result_path, result_id = test_class._process_path( path, relative=True, - dir_format=self.dir_fmt, - suffixes=suffixes ) self.assertEqual(result_id, "sample_id") self.assertEqual(result_path, "sample_id.txt") def test_process_path_absolute(self): - # Test when the relative flag is False (absolute path is returned) - path = Path(self.dir_fmt.path / "sample_id_suffix2.txt") - suffixes = ["_suffix1", "_suffix2"] + test_class = self.TestClass() + path = Path(test_class.path / "sample_id_suffix1.txt") - result_path, result_id = _process_path( - path, - relative=False, - dir_format=self.dir_fmt, - suffixes=suffixes - ) + result_path, result_id = test_class._process_path(path) self.assertEqual(result_id, "sample_id") self.assertEqual(result_path, str(path.absolute())) From ae09d8f46e7213e1240c5f053d9d34fb2e776ab4 Mon Sep 17 00:00:00 2001 From: VinzentRisch Date: Tue, 11 Feb 2025 11:05:05 +0100 Subject: [PATCH 22/22] lint --- q2_types/_util.py | 14 +++++--------- q2_types/tests/test_util.py | 1 - 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/q2_types/_util.py b/q2_types/_util.py index 8cd80a7..4df29eb 100644 --- a/q2_types/_util.py +++ b/q2_types/_util.py @@ -194,14 +194,13 @@ def file_dict(self, relative=False): return dict(sorted(ids.items())) - def _process_path(self, path, relative=False): """ This function processes the input file path to generate an absolute or relative path string and the ID derived from the file name. The ID is - extracted by removing the one of the specified suffixes from the file - name. If the class does not have a suffixes attribute, then the ID is - defined to be the filename. + extracted by removing one of the suffixes from the file name. If the + class does not have a suffixes attribute, then the ID is defined to + be the filename. Parameters: --------- @@ -210,16 +209,13 @@ def _process_path(self, path, relative=False): relative : bool A flag indicating whether the returned path should be relative to the directory formats path or absolute. - dir_format : model.DirectoryFormat. - Any object of class model.DirectoryFormat. Returns: ------- processed_path : str - The full relative or absolute path to the file. + The relative or absolute path to the file. _id : str - The ID derived from the file name. ID will be "" if the filename - consists only of the suffix. + The ID derived from the file name. """ file_name = path.stem _id = file_name diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py index 5e0ff7d..0c495ea 100644 --- a/q2_types/tests/test_util.py +++ b/q2_types/tests/test_util.py @@ -139,7 +139,6 @@ def test_file_dict_mixin_kraken_outputs(self): } self.assertDictEqual(obs, exp) - def test_process_path_with_suffix(self): # Test when class does have suffixes attribute test_class = self.TestClass()