From b4b740a0e5e6ecd3022498896dab44e45cf306c2 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Tue, 15 Oct 2024 10:00:16 +0200
Subject: [PATCH 01/22] added file_dict mixin

---
 q2_types/kraken2/_formats.py           | 100 ++++++++++++++++++++++++-
 q2_types/kraken2/tests/test_formats.py |   6 ++
 2 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/q2_types/kraken2/_formats.py b/q2_types/kraken2/_formats.py
index 380bbd3..d634314 100644
--- a/q2_types/kraken2/_formats.py
+++ b/q2_types/kraken2/_formats.py
@@ -5,6 +5,7 @@
 #
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
+from collections import defaultdict
 
 import pandas as pd
 from pandas.core.dtypes.common import is_string_dtype
@@ -67,7 +68,104 @@ def _validate_(self, level):
             )
 
 
-class Kraken2ReportDirectoryFormat(model.DirectoryFormat):
+class FileDictMixin:
+    def file_dict(self, relative=False, suffixes=None):
+        """
+        For per sample directories it returns a mapping of sample id to
+        another dictionary where keys represent the file name and values
+        correspond to the filepath for each file.
+        For files, it returns a mapping of file name to filepath for each
+        file. The specified suffixes are removed from filenames.
+
+        Parameters
+        ---------
+        relative : bool
+            Whether to return filepaths relative to the directory's location.
+            Returns absolute filepaths by default.
+        suffixes : List
+            A list of suffixes that should be removed from the filenames to
+            generate the ID.
+
+        Returns
+        -------
+        dict
+            Mapping of filename -> filepath as described above.
+            Or mapping of sample id -> dict {filename: filepath} as
+            described above.
+            Both levels of the dictionary are sorted alphabetically by key.
+        """
+        ids = defaultdict(dict)
+        for entry in self.path.iterdir():
+            if entry.is_dir():
+                outer_id = entry.name
+                for path in entry.iterdir():
+                    file_path, inner_id = _create_path(
+                        path=path,
+                        relative=relative,
+                        dir_format=self,
+                        suffixes=suffixes
+                    )
+
+                    ids[outer_id][inner_id] = str(file_path)
+                ids[outer_id] = dict(sorted(ids[outer_id].items()))
+            else:
+                file_path, inner_id = _create_path(
+                    path=entry,
+                    relative=relative,
+                    dir_format=self,
+                    suffixes=suffixes
+
+                )
+
+                ids[inner_id] = str(file_path)
+
+        return dict(sorted(ids.items()))
+
+
+def _create_path(path, relative, dir_format, suffixes):
+    """
+    This function processes the input file path to generate an absolute or
+    relative path string and the ID derived from the file name. The ID is
+    extracted by removing the one of the specified suffixes  from the file
+    name. If no suffixes are specified the ID is defined to be the filename.
+
+    Parameters:
+    ---------
+        path : Path
+            A Path object representing the file path to process.
+        relative : bool
+            A flag indicating whether the returned path should be relative
+            to the directory formats path or absolute.
+        dir_format : DirectoryFormat.
+            Any object of class DirectoryFormat.
+
+    Returns:
+    -------
+        path_dict : str
+            The full relative or absolut path to the file.
+        _id : str
+            The ID derived from the file name. ID will be "" if the filename
+            consists only of the suffix.
+    """
+    file_name = path.stem
+
+    _id = file_name
+
+    if suffixes:
+        for suffix in suffixes:
+            if file_name.endswith(suffix[1:]):
+                _id = file_name[:-len(suffix)]
+                break
+
+    path_dict = (
+        path.absolute().relative_to(dir_format.path.absolute())
+        if relative
+        else path.absolute()
+    )
+    return str(path_dict), _id
+
+
+class Kraken2ReportDirectoryFormat(model.DirectoryFormat, FileDictMixin):
     reports = model.FileCollection(
         r'.+report\.(txt|tsv)$', format=Kraken2ReportFormat
     )
diff --git a/q2_types/kraken2/tests/test_formats.py b/q2_types/kraken2/tests/test_formats.py
index 038eede..4b0157b 100644
--- a/q2_types/kraken2/tests/test_formats.py
+++ b/q2_types/kraken2/tests/test_formats.py
@@ -87,6 +87,12 @@ def test_report_dirfmt_from_reads(self):
         fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r')
         fmt.validate()
 
+    def test_report_dirfmt_from_reads_dict(self):
+        dirpath = self.get_data_path('reports-reads')
+        fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r')
+        dict = fmt.file_dict(suffixes=["_report"])
+        print(fmt)
+
     def test_report_dirfmt_from_mags(self):
         dirpath = self.get_data_path('reports-mags')
         fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r')

From 03efc075af2fcc0629b9f35f01070dbacab311f5 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Tue, 15 Oct 2024 10:36:12 +0200
Subject: [PATCH 02/22] moved mixin to util

---
 q2_types/_util.py                      |  98 ++++++++++++++++++++++++
 q2_types/kraken2/_formats.py           | 101 +------------------------
 q2_types/kraken2/tests/test_formats.py |   6 --
 3 files changed, 100 insertions(+), 105 deletions(-)

diff --git a/q2_types/_util.py b/q2_types/_util.py
index 9f633b7..0349450 100644
--- a/q2_types/_util.py
+++ b/q2_types/_util.py
@@ -8,6 +8,7 @@
 import gzip
 import itertools
 import warnings
+from collections import defaultdict
 from typing import List
 
 import skbio
@@ -138,3 +139,100 @@ def _validate_mag_ids(
                 "correctly. Printing duplicate MAG IDs: "
                 f"{set(duplicates)}"
             )
+
+
+class FileDictMixin:
+    def file_dict(self, relative=False, suffixes=None):
+        """
+        For per sample directories it returns a mapping of sample id to
+        another dictionary where keys represent the file name and values
+        correspond to the filepath for each file.
+        For files, it returns a mapping of file name to filepath for each
+        file. The specified suffixes are removed from filenames.
+
+        Parameters
+        ---------
+        relative : bool
+            Whether to return filepaths relative to the directory's location.
+            Returns absolute filepaths by default.
+        suffixes : List
+            A list of suffixes that should be removed from the filenames to
+            generate the ID.
+
+        Returns
+        -------
+        dict
+            Mapping of filename -> filepath as described above.
+            Or mapping of sample id -> dict {filename: filepath} as
+            described above.
+            Both levels of the dictionary are sorted alphabetically by key.
+        """
+        ids = defaultdict(dict)
+        for entry in self.path.iterdir():
+            if entry.is_dir():
+                outer_id = entry.name
+                for path in entry.iterdir():
+                    file_path, inner_id = _create_path(
+                        path=path,
+                        relative=relative,
+                        dir_format=self,
+                        suffixes=suffixes
+                    )
+
+                    ids[outer_id][inner_id] = str(file_path)
+                ids[outer_id] = dict(sorted(ids[outer_id].items()))
+            else:
+                file_path, inner_id = _create_path(
+                    path=entry,
+                    relative=relative,
+                    dir_format=self,
+                    suffixes=suffixes
+
+                )
+
+                ids[inner_id] = str(file_path)
+
+        return dict(sorted(ids.items()))
+
+
+def _create_path(path, relative, dir_format, suffixes):
+    """
+    This function processes the input file path to generate an absolute or
+    relative path string and the ID derived from the file name. The ID is
+    extracted by removing the one of the specified suffixes  from the file
+    name. If no suffixes are specified the ID is defined to be the filename.
+
+    Parameters:
+    ---------
+        path : Path
+            A Path object representing the file path to process.
+        relative : bool
+            A flag indicating whether the returned path should be relative
+            to the directory formats path or absolute.
+        dir_format : DirectoryFormat.
+            Any object of class DirectoryFormat.
+
+    Returns:
+    -------
+        path_dict : str
+            The full relative or absolut path to the file.
+        _id : str
+            The ID derived from the file name. ID will be "" if the filename
+            consists only of the suffix.
+    """
+    file_name = path.stem
+
+    _id = file_name
+
+    if suffixes:
+        for suffix in suffixes:
+            if file_name.endswith(suffix[1:]):
+                _id = file_name[:-len(suffix)]
+                break
+
+    path_dict = (
+        path.absolute().relative_to(dir_format.path.absolute())
+        if relative
+        else path.absolute()
+    )
+    return str(path_dict), _id
\ No newline at end of file
diff --git a/q2_types/kraken2/_formats.py b/q2_types/kraken2/_formats.py
index d634314..6d6ad74 100644
--- a/q2_types/kraken2/_formats.py
+++ b/q2_types/kraken2/_formats.py
@@ -5,12 +5,12 @@
 #
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
-from collections import defaultdict
-
 import pandas as pd
 from pandas.core.dtypes.common import is_string_dtype
 from qiime2.plugin import model, ValidationError
 
+from q2_types._util import FileDictMixin
+
 
 class Kraken2ReportFormat(model.TextFileFormat):
     MEASURE_COLUMNS = {
@@ -68,103 +68,6 @@ def _validate_(self, level):
             )
 
 
-class FileDictMixin:
-    def file_dict(self, relative=False, suffixes=None):
-        """
-        For per sample directories it returns a mapping of sample id to
-        another dictionary where keys represent the file name and values
-        correspond to the filepath for each file.
-        For files, it returns a mapping of file name to filepath for each
-        file. The specified suffixes are removed from filenames.
-
-        Parameters
-        ---------
-        relative : bool
-            Whether to return filepaths relative to the directory's location.
-            Returns absolute filepaths by default.
-        suffixes : List
-            A list of suffixes that should be removed from the filenames to
-            generate the ID.
-
-        Returns
-        -------
-        dict
-            Mapping of filename -> filepath as described above.
-            Or mapping of sample id -> dict {filename: filepath} as
-            described above.
-            Both levels of the dictionary are sorted alphabetically by key.
-        """
-        ids = defaultdict(dict)
-        for entry in self.path.iterdir():
-            if entry.is_dir():
-                outer_id = entry.name
-                for path in entry.iterdir():
-                    file_path, inner_id = _create_path(
-                        path=path,
-                        relative=relative,
-                        dir_format=self,
-                        suffixes=suffixes
-                    )
-
-                    ids[outer_id][inner_id] = str(file_path)
-                ids[outer_id] = dict(sorted(ids[outer_id].items()))
-            else:
-                file_path, inner_id = _create_path(
-                    path=entry,
-                    relative=relative,
-                    dir_format=self,
-                    suffixes=suffixes
-
-                )
-
-                ids[inner_id] = str(file_path)
-
-        return dict(sorted(ids.items()))
-
-
-def _create_path(path, relative, dir_format, suffixes):
-    """
-    This function processes the input file path to generate an absolute or
-    relative path string and the ID derived from the file name. The ID is
-    extracted by removing the one of the specified suffixes  from the file
-    name. If no suffixes are specified the ID is defined to be the filename.
-
-    Parameters:
-    ---------
-        path : Path
-            A Path object representing the file path to process.
-        relative : bool
-            A flag indicating whether the returned path should be relative
-            to the directory formats path or absolute.
-        dir_format : DirectoryFormat.
-            Any object of class DirectoryFormat.
-
-    Returns:
-    -------
-        path_dict : str
-            The full relative or absolut path to the file.
-        _id : str
-            The ID derived from the file name. ID will be "" if the filename
-            consists only of the suffix.
-    """
-    file_name = path.stem
-
-    _id = file_name
-
-    if suffixes:
-        for suffix in suffixes:
-            if file_name.endswith(suffix[1:]):
-                _id = file_name[:-len(suffix)]
-                break
-
-    path_dict = (
-        path.absolute().relative_to(dir_format.path.absolute())
-        if relative
-        else path.absolute()
-    )
-    return str(path_dict), _id
-
-
 class Kraken2ReportDirectoryFormat(model.DirectoryFormat, FileDictMixin):
     reports = model.FileCollection(
         r'.+report\.(txt|tsv)$', format=Kraken2ReportFormat
diff --git a/q2_types/kraken2/tests/test_formats.py b/q2_types/kraken2/tests/test_formats.py
index 4b0157b..038eede 100644
--- a/q2_types/kraken2/tests/test_formats.py
+++ b/q2_types/kraken2/tests/test_formats.py
@@ -87,12 +87,6 @@ def test_report_dirfmt_from_reads(self):
         fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r')
         fmt.validate()
 
-    def test_report_dirfmt_from_reads_dict(self):
-        dirpath = self.get_data_path('reports-reads')
-        fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r')
-        dict = fmt.file_dict(suffixes=["_report"])
-        print(fmt)
-
     def test_report_dirfmt_from_mags(self):
         dirpath = self.get_data_path('reports-mags')
         fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r')

From b8b270aafb0079f23dd9405d594e5c5ab3e344a7 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Tue, 15 Oct 2024 17:36:19 +0200
Subject: [PATCH 03/22] added tests for file_ dict

---
 q2_types/_util.py                             |   6 +-
 .../tests/data/not_per_sample/id1_suffix1.txt |   0
 .../tests/data/not_per_sample/id2_suffix2.txt |   0
 .../data/per_sample/sample1/id1_suffix.txt    |   0
 .../tests/data/per_sample/sample2/suffix.txt  |   0
 q2_types/tests/test_util.py                   | 130 +++++++++++++++++-
 6 files changed, 132 insertions(+), 4 deletions(-)
 create mode 100644 q2_types/tests/data/not_per_sample/id1_suffix1.txt
 create mode 100644 q2_types/tests/data/not_per_sample/id2_suffix2.txt
 create mode 100644 q2_types/tests/data/per_sample/sample1/id1_suffix.txt
 create mode 100644 q2_types/tests/data/per_sample/sample2/suffix.txt

diff --git a/q2_types/_util.py b/q2_types/_util.py
index 0349450..9b8ddb7 100644
--- a/q2_types/_util.py
+++ b/q2_types/_util.py
@@ -172,7 +172,7 @@ def file_dict(self, relative=False, suffixes=None):
             if entry.is_dir():
                 outer_id = entry.name
                 for path in entry.iterdir():
-                    file_path, inner_id = _create_path(
+                    file_path, inner_id = _process_path(
                         path=path,
                         relative=relative,
                         dir_format=self,
@@ -182,7 +182,7 @@ def file_dict(self, relative=False, suffixes=None):
                     ids[outer_id][inner_id] = str(file_path)
                 ids[outer_id] = dict(sorted(ids[outer_id].items()))
             else:
-                file_path, inner_id = _create_path(
+                file_path, inner_id = _process_path(
                     path=entry,
                     relative=relative,
                     dir_format=self,
@@ -195,7 +195,7 @@ def file_dict(self, relative=False, suffixes=None):
         return dict(sorted(ids.items()))
 
 
-def _create_path(path, relative, dir_format, suffixes):
+def _process_path(path, relative, dir_format, suffixes):
     """
     This function processes the input file path to generate an absolute or
     relative path string and the ID derived from the file name. The ID is
diff --git a/q2_types/tests/data/not_per_sample/id1_suffix1.txt b/q2_types/tests/data/not_per_sample/id1_suffix1.txt
new file mode 100644
index 0000000..e69de29
diff --git a/q2_types/tests/data/not_per_sample/id2_suffix2.txt b/q2_types/tests/data/not_per_sample/id2_suffix2.txt
new file mode 100644
index 0000000..e69de29
diff --git a/q2_types/tests/data/per_sample/sample1/id1_suffix.txt b/q2_types/tests/data/per_sample/sample1/id1_suffix.txt
new file mode 100644
index 0000000..e69de29
diff --git a/q2_types/tests/data/per_sample/sample2/suffix.txt b/q2_types/tests/data/per_sample/sample2/suffix.txt
new file mode 100644
index 0000000..e69de29
diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py
index edaa929..57b750b 100644
--- a/q2_types/tests/test_util.py
+++ b/q2_types/tests/test_util.py
@@ -5,9 +5,14 @@
 #
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
+import os
+from pathlib import Path
+
+from qiime2.plugin import model
 from qiime2.plugin.testing import TestPluginBase
 
-from q2_types._util import _validate_num_partitions, _validate_mag_ids
+from q2_types._util import _validate_num_partitions, _validate_mag_ids, \
+    FileDictMixin, _process_path
 
 
 class TestUtil(TestPluginBase):
@@ -46,3 +51,126 @@ def test_validate_mag_ids_invalid(self):
                 6,
                 [(0, "a"), (0, "a"), (0, "c"), (0, "d"), (0, "e"), (0, "f")]
             )
+
+    def test_file_dict_mixin(self):
+        TestClass = type(
+            f"{model.DirectoryFormat.__name__}With{FileDictMixin.__name__}",
+            (FileDictMixin, model.DirectoryFormat),
+            {}
+        )
+        fmt = TestClass(self.get_data_path("per_sample"), mode='r')
+
+        obs = fmt.file_dict(suffixes=["_suffix"])
+        exp = {
+            "sample1": {
+                "id1": os.path.join(str(fmt), "sample1", "id1_suffix.txt"),
+            },
+            "sample2": {
+                "": os.path.join(str(fmt), "sample2", "suffix.txt"),
+            },
+        }
+        self.assertDictEqual(obs, exp)
+
+        obs = fmt.file_dict(suffixes=["_suffix"], relative=True)
+        exp = {
+            "sample1": {
+                "id1": "sample1/id1_suffix.txt",
+            },
+            "sample2": {
+                "": "sample2/suffix.txt",
+            },
+        }
+        self.assertDictEqual(obs, exp)
+
+    def test_genes_dirfmt_genome_dict(self):
+        TestClass = type(
+            f"{model.DirectoryFormat.__name__}With{FileDictMixin.__name__}",
+            (FileDictMixin, model.DirectoryFormat),
+            {}
+        )
+        fmt = TestClass(self.get_data_path("not_per_sample"), mode='r')
+
+        obs = fmt.file_dict(suffixes=["_suffix1", "_suffix2"])
+        exp = {
+            "id1": os.path.join(str(fmt), "id1_suffix1.txt"),
+            "id2": os.path.join(str(fmt), "id2_suffix2.txt"),
+        }
+        self.assertDictEqual(obs, exp)
+
+        obs = fmt.file_dict(
+            suffixes=["_suffix1", "_suffix2"],
+            relative=True
+        )
+        exp = {
+            "id1": "id1_suffix1.txt",
+            "id2": "id2_suffix2.txt",
+        }
+        self.assertDictEqual(obs, exp)
+
+
+class TestProcessPath(TestPluginBase):
+    package = "q2_types.tests"
+
+    def setUp(self):
+        super().setUp()
+        self.dir_fmt = model.DirectoryFormat()
+
+    def test_process_path_with_suffix(self):
+        # Test when the file name ends with a given suffix
+        path = Path(self.dir_fmt.path / "sample_id_suffix1.txt")
+        suffixes = ["_suffix1", "_suffix2"]
+
+        result_path, result_id = _process_path(
+            path,
+            relative=True,
+            dir_format=self.dir_fmt,
+            suffixes=suffixes
+        )
+
+        self.assertEqual(result_id, "sample_id")
+        self.assertEqual(result_path, "sample_id_suffix1.txt")
+
+    def test_process_path_without_suffix(self):
+        # Test when no suffix matches the file name
+        path = Path(self.dir_fmt.path / "sample_id.txt")
+        suffixes = ["_suffix1", "_suffix2"]
+
+        result_path, result_id = _process_path(
+            path,
+            relative=True,
+            dir_format=self.dir_fmt,
+            suffixes=suffixes
+        )
+
+        self.assertEqual(result_id, "sample_id")
+        self.assertEqual(result_path, "sample_id.txt")
+
+    def test_process_path_absolute(self):
+        # Test when the relative flag is False (absolute path is returned)
+        path = Path(self.dir_fmt.path / "sample_id_suffix2.txt")
+        suffixes = ["_suffix1", "_suffix2"]
+
+        result_path, result_id = _process_path(
+            path,
+            relative=False,
+            dir_format=self.dir_fmt,
+            suffixes=suffixes
+        )
+
+        self.assertEqual(result_id, "sample_id")
+        self.assertEqual(result_path, str(path.absolute()))
+
+    def test_process_path_only_suffix(self):
+        # Test when the file name consists only of the suffix
+        path = Path(self.dir_fmt.path / "suffix1.txt")
+        suffixes = ["_suffix1", "_suffix2"]
+
+        result_path, result_id = _process_path(
+            path,
+            relative=True,
+            dir_format=self.dir_fmt,
+            suffixes=suffixes
+        )
+
+        self.assertEqual(result_id, "")
+        self.assertEqual(result_path, "suffix1.txt")

From b17c931c5a9e3020b3724d9c285064e152dd6070 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Tue, 15 Oct 2024 17:37:53 +0200
Subject: [PATCH 04/22] added to package dataa

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index a71fae3..7a50367 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@
     },
     package_data={
         'q2_types': ['citations.bib'],
-        'q2_types.tests': ['data/*'],
+        'q2_types.tests': ['data/*', 'data/*/*', 'data/*/*/*'],
         'q2_types.bowtie2': ['citations.bib'],
         'q2_types.distance_matrix.tests': ['data/*'],
         'q2_types.feature_data.tests': ['data/*',

From 65fe610c9c256f5908df812265c33a2d71cc500e Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Wed, 16 Oct 2024 15:19:36 +0200
Subject: [PATCH 05/22] added separator without tests

---
 q2_types/_util.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/q2_types/_util.py b/q2_types/_util.py
index 9b8ddb7..c4e6918 100644
--- a/q2_types/_util.py
+++ b/q2_types/_util.py
@@ -142,7 +142,7 @@ def _validate_mag_ids(
 
 
 class FileDictMixin:
-    def file_dict(self, relative=False, suffixes=None):
+    def file_dict(self, relative=False, suffixes=None, separator=None):
         """
         For per sample directories it returns a mapping of sample id to
         another dictionary where keys represent the file name and values
@@ -176,7 +176,8 @@ def file_dict(self, relative=False, suffixes=None):
                         path=path,
                         relative=relative,
                         dir_format=self,
-                        suffixes=suffixes
+                        suffixes=suffixes,
+                        separator=separator
                     )
 
                     ids[outer_id][inner_id] = str(file_path)
@@ -186,8 +187,8 @@ def file_dict(self, relative=False, suffixes=None):
                     path=entry,
                     relative=relative,
                     dir_format=self,
-                    suffixes=suffixes
-
+                    suffixes=suffixes,
+                    separator=separator
                 )
 
                 ids[inner_id] = str(file_path)
@@ -195,7 +196,7 @@ def file_dict(self, relative=False, suffixes=None):
         return dict(sorted(ids.items()))
 
 
-def _process_path(path, relative, dir_format, suffixes):
+def _process_path(path, relative, dir_format, suffixes, separator):
     """
     This function processes the input file path to generate an absolute or
     relative path string and the ID derived from the file name. The ID is
@@ -226,8 +227,8 @@ def _process_path(path, relative, dir_format, suffixes):
 
     if suffixes:
         for suffix in suffixes:
-            if file_name.endswith(suffix[1:]):
-                _id = file_name[:-len(suffix)]
+            if file_name.endswith(suffix):
+                _id = file_name[:-len(suffix)+len(separator)]
                 break
 
     path_dict = (

From 688dd944f146406a657cf6b049be99db8dcaa378 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Thu, 17 Oct 2024 10:55:58 +0200
Subject: [PATCH 06/22] fixed bug slicing

---
 q2_types/_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/q2_types/_util.py b/q2_types/_util.py
index c4e6918..e9cc678 100644
--- a/q2_types/_util.py
+++ b/q2_types/_util.py
@@ -228,7 +228,7 @@ def _process_path(path, relative, dir_format, suffixes, separator):
     if suffixes:
         for suffix in suffixes:
             if file_name.endswith(suffix):
-                _id = file_name[:-len(suffix)+len(separator)]
+                _id = file_name[:-(len(suffix)+len(separator))]
                 break
 
     path_dict = (

From a9cc86367c0ba65bca0c67f658e9076483a2ea15 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Thu, 17 Oct 2024 11:18:11 +0200
Subject: [PATCH 07/22] removed separator

---
 q2_types/_util.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/q2_types/_util.py b/q2_types/_util.py
index e9cc678..8383d40 100644
--- a/q2_types/_util.py
+++ b/q2_types/_util.py
@@ -142,7 +142,7 @@ def _validate_mag_ids(
 
 
 class FileDictMixin:
-    def file_dict(self, relative=False, suffixes=None, separator=None):
+    def file_dict(self, relative=False, suffixes=None):
         """
         For per sample directories it returns a mapping of sample id to
         another dictionary where keys represent the file name and values
@@ -177,7 +177,6 @@ def file_dict(self, relative=False, suffixes=None, separator=None):
                         relative=relative,
                         dir_format=self,
                         suffixes=suffixes,
-                        separator=separator
                     )
 
                     ids[outer_id][inner_id] = str(file_path)
@@ -188,7 +187,6 @@ def file_dict(self, relative=False, suffixes=None, separator=None):
                     relative=relative,
                     dir_format=self,
                     suffixes=suffixes,
-                    separator=separator
                 )
 
                 ids[inner_id] = str(file_path)
@@ -196,7 +194,7 @@ def file_dict(self, relative=False, suffixes=None, separator=None):
         return dict(sorted(ids.items()))
 
 
-def _process_path(path, relative, dir_format, suffixes, separator):
+def _process_path(path, relative, dir_format, suffixes):
     """
     This function processes the input file path to generate an absolute or
     relative path string and the ID derived from the file name. The ID is
@@ -228,7 +226,7 @@ def _process_path(path, relative, dir_format, suffixes, separator):
     if suffixes:
         for suffix in suffixes:
             if file_name.endswith(suffix):
-                _id = file_name[:-(len(suffix)+len(separator))]
+                _id = file_name[:-len(suffix)]
                 break
 
     path_dict = (

From 7ad2f0ddc8178f0834b6dd207351ece118f132e7 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Thu, 17 Oct 2024 13:14:05 +0200
Subject: [PATCH 08/22] removed option for id to be empty

---
 .../sample2/{suffix.txt => id2_suffix.txt}    |  0
 q2_types/tests/test_util.py                   | 23 +++++--------------
 2 files changed, 6 insertions(+), 17 deletions(-)
 rename q2_types/tests/data/per_sample/sample2/{suffix.txt => id2_suffix.txt} (100%)

diff --git a/q2_types/tests/data/per_sample/sample2/suffix.txt b/q2_types/tests/data/per_sample/sample2/id2_suffix.txt
similarity index 100%
rename from q2_types/tests/data/per_sample/sample2/suffix.txt
rename to q2_types/tests/data/per_sample/sample2/id2_suffix.txt
diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py
index 57b750b..7e473a3 100644
--- a/q2_types/tests/test_util.py
+++ b/q2_types/tests/test_util.py
@@ -52,6 +52,10 @@ def test_validate_mag_ids_invalid(self):
                 [(0, "a"), (0, "a"), (0, "c"), (0, "d"), (0, "e"), (0, "f")]
             )
 
+
+class TestFileDictMixing(TestPluginBase):
+    package = "q2_types.tests"
+
     def test_file_dict_mixin(self):
         TestClass = type(
             f"{model.DirectoryFormat.__name__}With{FileDictMixin.__name__}",
@@ -66,7 +70,7 @@ def test_file_dict_mixin(self):
                 "id1": os.path.join(str(fmt), "sample1", "id1_suffix.txt"),
             },
             "sample2": {
-                "": os.path.join(str(fmt), "sample2", "suffix.txt"),
+                "id2": os.path.join(str(fmt), "sample2", "id2_suffix.txt"),
             },
         }
         self.assertDictEqual(obs, exp)
@@ -77,7 +81,7 @@ def test_file_dict_mixin(self):
                 "id1": "sample1/id1_suffix.txt",
             },
             "sample2": {
-                "": "sample2/suffix.txt",
+                "id2": "sample2/id2_suffix.txt",
             },
         }
         self.assertDictEqual(obs, exp)
@@ -159,18 +163,3 @@ def test_process_path_absolute(self):
 
         self.assertEqual(result_id, "sample_id")
         self.assertEqual(result_path, str(path.absolute()))
-
-    def test_process_path_only_suffix(self):
-        # Test when the file name consists only of the suffix
-        path = Path(self.dir_fmt.path / "suffix1.txt")
-        suffixes = ["_suffix1", "_suffix2"]
-
-        result_path, result_id = _process_path(
-            path,
-            relative=True,
-            dir_format=self.dir_fmt,
-            suffixes=suffixes
-        )
-
-        self.assertEqual(result_id, "")
-        self.assertEqual(result_path, "suffix1.txt")

From 9b3d30fe59fb27d07c4491d23920ba109ab08d40 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Thu, 17 Oct 2024 13:16:06 +0200
Subject: [PATCH 09/22] added mixing to outputs format

---
 q2_types/kraken2/_formats.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/q2_types/kraken2/_formats.py b/q2_types/kraken2/_formats.py
index 6d6ad74..38c1583 100644
--- a/q2_types/kraken2/_formats.py
+++ b/q2_types/kraken2/_formats.py
@@ -147,7 +147,7 @@ def _validate_(self, level):
             )
 
 
-class Kraken2OutputDirectoryFormat(model.DirectoryFormat):
+class Kraken2OutputDirectoryFormat(model.DirectoryFormat, FileDictMixin):
     reports = model.FileCollection(
         r'.+output\.(txt|tsv)$', format=Kraken2OutputFormat
     )

From e5547df33f7befd39f5f15bb364ad8bc63b60785 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Thu, 17 Oct 2024 13:57:31 +0200
Subject: [PATCH 10/22] added pathspec check

---
 q2_types/_util.py                            | 47 +++++++++++---------
 q2_types/tests/data/not_per_sample/some_file |  0
 q2_types/tests/data/per_sample/some_file     |  0
 q2_types/tests/test_util.py                  | 18 ++++----
 4 files changed, 36 insertions(+), 29 deletions(-)
 create mode 100644 q2_types/tests/data/not_per_sample/some_file
 create mode 100644 q2_types/tests/data/per_sample/some_file

diff --git a/q2_types/_util.py b/q2_types/_util.py
index 8383d40..e05ff73 100644
--- a/q2_types/_util.py
+++ b/q2_types/_util.py
@@ -7,6 +7,7 @@
 # ----------------------------------------------------------------------------
 import gzip
 import itertools
+import re
 import warnings
 from collections import defaultdict
 from typing import List
@@ -146,9 +147,10 @@ def file_dict(self, relative=False, suffixes=None):
         """
         For per sample directories it returns a mapping of sample id to
         another dictionary where keys represent the file name and values
-        correspond to the filepath for each file.
+        correspond to the filepath for each file matching the pathspec.
         For files, it returns a mapping of file name to filepath for each
-        file. The specified suffixes are removed from filenames.
+        file matching the pathspec. The specified suffixes are removed
+        from filenames.
 
         Parameters
         ---------
@@ -167,29 +169,34 @@ def file_dict(self, relative=False, suffixes=None):
             described above.
             Both levels of the dictionary are sorted alphabetically by key.
         """
+        file_pattern = re.compile(self.pathspec)
         ids = defaultdict(dict)
         for entry in self.path.iterdir():
             if entry.is_dir():
                 outer_id = entry.name
                 for path in entry.iterdir():
+                    if file_pattern.match(path.name):
+
+                        file_path, inner_id = _process_path(
+                            path=path,
+                            relative=relative,
+                            dir_format=self,
+                            suffixes=suffixes,
+                        )
+
+                        ids[outer_id][inner_id] = str(file_path)
+                ids[outer_id] = dict(sorted(ids[outer_id].items()))
+            else:
+                if file_pattern.match(entry.name):
+
                     file_path, inner_id = _process_path(
-                        path=path,
+                        path=entry,
                         relative=relative,
                         dir_format=self,
                         suffixes=suffixes,
                     )
 
-                    ids[outer_id][inner_id] = str(file_path)
-                ids[outer_id] = dict(sorted(ids[outer_id].items()))
-            else:
-                file_path, inner_id = _process_path(
-                    path=entry,
-                    relative=relative,
-                    dir_format=self,
-                    suffixes=suffixes,
-                )
-
-                ids[inner_id] = str(file_path)
+                    ids[inner_id] = str(file_path)
 
         return dict(sorted(ids.items()))
 
@@ -198,7 +205,7 @@ def _process_path(path, relative, dir_format, suffixes):
     """
     This function processes the input file path to generate an absolute or
     relative path string and the ID derived from the file name. The ID is
-    extracted by removing the one of the specified suffixes  from the file
+    extracted by removing the one of the specified suffixes from the file
     name. If no suffixes are specified the ID is defined to be the filename.
 
     Parameters:
@@ -208,12 +215,12 @@ def _process_path(path, relative, dir_format, suffixes):
         relative : bool
             A flag indicating whether the returned path should be relative
             to the directory formats path or absolute.
-        dir_format : DirectoryFormat.
-            Any object of class DirectoryFormat.
+        dir_format : model.DirectoryFormat.
+            Any object of class model.DirectoryFormat.
 
     Returns:
     -------
-        path_dict : str
+        processed_path : str
             The full relative or absolut path to the file.
         _id : str
             The ID derived from the file name. ID will be "" if the filename
@@ -229,9 +236,9 @@ def _process_path(path, relative, dir_format, suffixes):
                 _id = file_name[:-len(suffix)]
                 break
 
-    path_dict = (
+    processed_path = (
         path.absolute().relative_to(dir_format.path.absolute())
         if relative
         else path.absolute()
     )
-    return str(path_dict), _id
\ No newline at end of file
+    return str(processed_path), _id
\ No newline at end of file
diff --git a/q2_types/tests/data/not_per_sample/some_file b/q2_types/tests/data/not_per_sample/some_file
new file mode 100644
index 0000000..e69de29
diff --git a/q2_types/tests/data/per_sample/some_file b/q2_types/tests/data/per_sample/some_file
new file mode 100644
index 0000000..e69de29
diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py
index 7e473a3..772121d 100644
--- a/q2_types/tests/test_util.py
+++ b/q2_types/tests/test_util.py
@@ -56,13 +56,18 @@ def test_validate_mag_ids_invalid(self):
 class TestFileDictMixing(TestPluginBase):
     package = "q2_types.tests"
 
-    def test_file_dict_mixin(self):
-        TestClass = type(
+    def setUp(self):
+        super().setUp()
+
+        self.TestClass = type(
             f"{model.DirectoryFormat.__name__}With{FileDictMixin.__name__}",
             (FileDictMixin, model.DirectoryFormat),
             {}
         )
-        fmt = TestClass(self.get_data_path("per_sample"), mode='r')
+        self.TestClass.pathspec = r'.+\.(txt|tsv)$'
+
+    def test_file_dict_mixin(self):
+        fmt = self.TestClass(self.get_data_path("per_sample"), mode='r')
 
         obs = fmt.file_dict(suffixes=["_suffix"])
         exp = {
@@ -87,12 +92,7 @@ def test_file_dict_mixin(self):
         self.assertDictEqual(obs, exp)
 
     def test_genes_dirfmt_genome_dict(self):
-        TestClass = type(
-            f"{model.DirectoryFormat.__name__}With{FileDictMixin.__name__}",
-            (FileDictMixin, model.DirectoryFormat),
-            {}
-        )
-        fmt = TestClass(self.get_data_path("not_per_sample"), mode='r')
+        fmt = self.TestClass(self.get_data_path("not_per_sample"), mode='r')
 
         obs = fmt.file_dict(suffixes=["_suffix1", "_suffix2"])
         exp = {

From daf2866758a8eb41a114f975d905b84989c6b544 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Thu, 17 Oct 2024 13:58:56 +0200
Subject: [PATCH 11/22] lint

---
 q2_types/_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/q2_types/_util.py b/q2_types/_util.py
index e05ff73..58de578 100644
--- a/q2_types/_util.py
+++ b/q2_types/_util.py
@@ -241,4 +241,4 @@ def _process_path(path, relative, dir_format, suffixes):
         if relative
         else path.absolute()
     )
-    return str(processed_path), _id
\ No newline at end of file
+    return str(processed_path), _id

From 46958562e726ee5aa6ff5c0090468efa8af05453 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Thu, 17 Oct 2024 14:32:23 +0200
Subject: [PATCH 12/22] added pathspec to kraken formats

---
 q2_types/kraken2/_formats.py              | 10 ++++------
 q2_types/per_sample_sequences/_formats.py |  5 +++--
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/q2_types/kraken2/_formats.py b/q2_types/kraken2/_formats.py
index 38c1583..f406ac4 100644
--- a/q2_types/kraken2/_formats.py
+++ b/q2_types/kraken2/_formats.py
@@ -69,9 +69,8 @@ def _validate_(self, level):
 
 
 class Kraken2ReportDirectoryFormat(model.DirectoryFormat, FileDictMixin):
-    reports = model.FileCollection(
-        r'.+report\.(txt|tsv)$', format=Kraken2ReportFormat
-    )
+    pathspec = r'.+report\.(txt|tsv)$'
+    reports = model.FileCollection(pathspec, format=Kraken2ReportFormat)
 
     @reports.set_path_maker
     def reports_path_maker(self, sample_id, mag_id=None):
@@ -148,9 +147,8 @@ def _validate_(self, level):
 
 
 class Kraken2OutputDirectoryFormat(model.DirectoryFormat, FileDictMixin):
-    reports = model.FileCollection(
-        r'.+output\.(txt|tsv)$', format=Kraken2OutputFormat
-    )
+    pathspec = r'.+output\.(txt|tsv)$'
+    reports = model.FileCollection(pathspec, format=Kraken2OutputFormat)
 
     @reports.set_path_maker
     def reports_path_maker(self, sample_id, mag_id=None):
diff --git a/q2_types/per_sample_sequences/_formats.py b/q2_types/per_sample_sequences/_formats.py
index 17bb3a4..4973821 100644
--- a/q2_types/per_sample_sequences/_formats.py
+++ b/q2_types/per_sample_sequences/_formats.py
@@ -25,7 +25,7 @@
 from q2_types.bowtie2 import Bowtie2IndexDirFmt
 from q2_types.feature_data import DNAFASTAFormat
 from ._util import _parse_sequence_filename, _manifest_to_df
-from .._util import FastqGzFormat
+from .._util import FastqGzFormat, FileDictMixin
 
 
 class FastqAbsolutePathManifestFormatV2(model.TextFileFormat):
@@ -536,7 +536,8 @@ def _validate_(self, level):
 
 
 class MultiFASTADirectoryFormat(MultiDirValidationMixin,
-                                model.DirectoryFormat):
+                                model.DirectoryFormat,
+                                FileDictMixin):
     pathspec = r'.+\.(fa|fasta)$'
     sequences = model.FileCollection(pathspec, format=DNAFASTAFormat)
 

From 2a8955605a4895e369f0d0dade3988eb0a6d195d Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Thu, 17 Oct 2024 15:30:49 +0200
Subject: [PATCH 13/22] removed genomedirectory format and fixed pathspec error
 in proteins

---
 q2_types/genome_data/__init__.py           |  3 +-
 q2_types/genome_data/_formats.py           | 69 ++++------------------
 q2_types/genome_data/tests/test_formats.py | 18 +++---
 3 files changed, 23 insertions(+), 67 deletions(-)

diff --git a/q2_types/genome_data/__init__.py b/q2_types/genome_data/__init__.py
index 40283a8..1de55bd 100644
--- a/q2_types/genome_data/__init__.py
+++ b/q2_types/genome_data/__init__.py
@@ -11,7 +11,6 @@
     GenesDirectoryFormat, ProteinsDirectoryFormat, LociDirectoryFormat,
     GFF3Format, OrthologFileFmt, SeedOrthologDirFmt,
     GenomeSequencesDirectoryFormat, OrthologAnnotationDirFmt,
-    GenomeDataDirectoryFormat,
 )
 from ._objects import IntervalMetadataIterator
 from ._types import (
@@ -25,6 +24,6 @@
     'GenesDirectoryFormat', 'ProteinsDirectoryFormat', 'LociDirectoryFormat',
     'IntervalMetadataIterator', 'OrthologFileFmt', 'Orthologs',
     'SeedOrthologDirFmt', 'GenomeSequencesDirectoryFormat', 'DNASequence',
-    'OrthologAnnotationDirFmt', 'NOG', 'GenomeDataDirectoryFormat',
+    'OrthologAnnotationDirFmt', 'NOG',
     'collate_orthologs', 'partition_orthologs', "collate_ortholog_annotations"
     ]
diff --git a/q2_types/genome_data/_formats.py b/q2_types/genome_data/_formats.py
index 15bbaf7..456ebe6 100644
--- a/q2_types/genome_data/_formats.py
+++ b/q2_types/genome_data/_formats.py
@@ -11,6 +11,7 @@
 import qiime2.plugin.model as model
 from qiime2.plugin import ValidationError
 
+from q2_types._util import FileDictMixin
 from q2_types.feature_data import DNAFASTAFormat, ProteinFASTAFormat
 
 
@@ -19,63 +20,18 @@ def _validate_(self, level):
         pass
 
 
-class GenomeDataDirectoryFormat(model.DirectoryFormat):
-    def genome_dict(self, relative=False):
-        """
-        For per sample directories it returns a mapping of sample id to
-        another dictionary where keys represent the file name and values
-        correspond to the filepath for each file.
-        For files, it returns a mapping of file name to filepath for each file.
-
-        Parameters
-        ---------
-        relative : bool
-            Whether to return filepaths relative to the directory's location.
-            Returns absolute filepaths by default.
-
-        Returns
-        -------
-        dict
-            Mapping of filename -> filepath as described above.
-            Or mapping of sample id -> dict {filename: filepath} as
-            described above.
-            Both levels of the dictionary are sorted alphabetically by key.
-        """
-        ids = defaultdict(dict)
-        for entry in self.path.iterdir():
-            if entry.is_dir():
-                sample_id = entry.name
-                for path in entry.iterdir():
-                    file_name = path.stem
-                    file_path = (
-                        path.absolute().relative_to(self.path.absolute())
-                        if relative else path.absolute()
-                    )
-                    ids[sample_id][file_name] = str(file_path)
-                ids[sample_id] = dict(sorted(ids[sample_id].items()))
-            else:
-                file_name = entry.stem
-                file_path = (
-                    entry.absolute().relative_to(self.path.absolute())
-                    if relative else entry.absolute()
-                )
-                ids[file_name] = str(file_path)
-
-        return dict(sorted(ids.items()))
-
-
-class GenesDirectoryFormat(GenomeDataDirectoryFormat):
-    genes = model.FileCollection(r'.+\.(fa|fna|fasta)$',
-                                 format=DNAFASTAFormat)
+class GenesDirectoryFormat(model.DirectoryFormat, FileDictMixin):
+    pathspec = r'.+\.(fa|fna|fasta)$'
+    genes = model.FileCollection(pathspec, format=DNAFASTAFormat)
 
     @genes.set_path_maker
     def genes_path_maker(self, genome_id):
         return '%s.fasta' % genome_id
 
 
-class ProteinsDirectoryFormat(GenomeDataDirectoryFormat):
-    proteins = model.FileCollection(r'.+\.(fa|faa|fasta)$',
-                                    format=ProteinFASTAFormat)
+class ProteinsDirectoryFormat(model.DirectoryFormat, FileDictMixin):
+    pathspec = r'.+\.(fa|faa|fasta)$'
+    proteins = model.FileCollection(pathspec, format=ProteinFASTAFormat)
 
     @proteins.set_path_maker
     def proteins_path_maker(self, genome_id):
@@ -205,17 +161,18 @@ def _validate_(self, level):
                                       f'{line_number}') from e
 
 
-class LociDirectoryFormat(GenomeDataDirectoryFormat):
-    loci = model.FileCollection(r'.+\.gff$',
-                                format=GFF3Format)
+class LociDirectoryFormat(model.DirectoryFormat, FileDictMixin):
+    pathspec = r'.+\.gff$'
+    loci = model.FileCollection(pathspec, format=GFF3Format)
 
     @loci.set_path_maker
     def loci_path_maker(self, genome_id):
         return '%s.gff' % genome_id
 
 
-class GenomeSequencesDirectoryFormat(GenomeDataDirectoryFormat):
-    genomes = model.FileCollection(r'.+\.(fasta|fa)$', format=DNAFASTAFormat)
+class GenomeSequencesDirectoryFormat(model.DirectoryFormat, FileDictMixin):
+    pathspec = r'.+\.(fasta|fa)$'
+    genomes = model.FileCollection(pathspec, format=DNAFASTAFormat)
 
     @genomes.set_path_maker
     def genomes_path_maker(self, genome_id):
diff --git a/q2_types/genome_data/tests/test_formats.py b/q2_types/genome_data/tests/test_formats.py
index 0b6e3c9..9f37403 100644
--- a/q2_types/genome_data/tests/test_formats.py
+++ b/q2_types/genome_data/tests/test_formats.py
@@ -180,17 +180,17 @@ def test_ortholog_annotations_annot_dict(self):
         }
         self.assertDictEqual(obs, exp)
 
-    def test_genome_data_dirfmt_samples_genome_dict(self):
-        genes = GenomeDataDirectoryFormat(
+    def test_genes_dirfmt_samples_file_dict(self):
+        genes = GenesDirectoryFormat(
             self.get_data_path('genes_samples'), mode='r')
 
-        obs = genes.genome_dict()
+        obs = genes.file_dict()
         exp = {
             'sample1': {
-                'genes1': str(Path(genes.path / 'sample1/genes1.fa')),
+                'genes1': str(genes.path / 'sample1/genes1.fa'),
             },
             'sample2': {
-                'genes2': str(Path(genes.path / 'sample2/genes2.fa')),
+                'genes2': str(genes.path / 'sample2/genes2.fa'),
             },
         }
         self.assertDictEqual(obs, exp)
@@ -206,15 +206,15 @@ def test_genome_data_dirfmt_samples_genome_dict(self):
         }
         self.assertDictEqual(obs, exp)
 
-    def test_genes_dirfmt_genome_dict(self):
+    def test_genes_dirfmt_file_dict(self):
         genes = (
-            GenomeDataDirectoryFormat(self.get_data_path('genes'), mode='r')
+            GenesDirectoryFormat(self.get_data_path('genes'), mode='r')
         )
 
         obs = genes.genome_dict()
         exp = {
-            'genes1': str(Path(genes.path / 'genes1.fa')),
-            'genes2': str(Path(genes.path / 'genes2.fa'))
+            'genes1': str(genes.path / 'genes1.fa'),
+            'genes2': str(genes.path / 'genes2.fa')
         }
         self.assertDictEqual(obs, exp)
 

From 1c8c25ba647fd24e4894c2d927f90685391ed73f Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Thu, 17 Oct 2024 15:31:55 +0200
Subject: [PATCH 14/22] removed tests for genomedatadirfmt file dict

---
 q2_types/genome_data/tests/test_formats.py | 47 ----------------------
 1 file changed, 47 deletions(-)

diff --git a/q2_types/genome_data/tests/test_formats.py b/q2_types/genome_data/tests/test_formats.py
index 9f37403..a17685d 100644
--- a/q2_types/genome_data/tests/test_formats.py
+++ b/q2_types/genome_data/tests/test_formats.py
@@ -6,7 +6,6 @@
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
 import unittest
-from pathlib import Path
 
 from qiime2.core.exceptions import ValidationError
 from qiime2.plugin.testing import TestPluginBase
@@ -15,7 +14,6 @@
     GenesDirectoryFormat, ProteinsDirectoryFormat, GFF3Format,
     LociDirectoryFormat, SeedOrthologDirFmt, OrthologFileFmt,
     OrthologAnnotationDirFmt, GenomeSequencesDirectoryFormat,
-    GenomeDataDirectoryFormat
 )
 
 
@@ -180,51 +178,6 @@ def test_ortholog_annotations_annot_dict(self):
         }
         self.assertDictEqual(obs, exp)
 
-    def test_genes_dirfmt_samples_file_dict(self):
-        genes = GenesDirectoryFormat(
-            self.get_data_path('genes_samples'), mode='r')
-
-        obs = genes.file_dict()
-        exp = {
-            'sample1': {
-                'genes1': str(genes.path / 'sample1/genes1.fa'),
-            },
-            'sample2': {
-                'genes2': str(genes.path / 'sample2/genes2.fa'),
-            },
-        }
-        self.assertDictEqual(obs, exp)
-
-        obs = genes.genome_dict(relative=True)
-        exp = {
-            'sample1': {
-                'genes1': 'sample1/genes1.fa',
-            },
-            'sample2': {
-                'genes2': 'sample2/genes2.fa',
-            },
-        }
-        self.assertDictEqual(obs, exp)
-
-    def test_genes_dirfmt_file_dict(self):
-        genes = (
-            GenesDirectoryFormat(self.get_data_path('genes'), mode='r')
-        )
-
-        obs = genes.genome_dict()
-        exp = {
-            'genes1': str(genes.path / 'genes1.fa'),
-            'genes2': str(genes.path / 'genes2.fa')
-        }
-        self.assertDictEqual(obs, exp)
-
-        obs = genes.genome_dict(relative=True)
-        exp = {
-            'genes1': 'genes1.fa',
-            'genes2': 'genes2.fa'
-        }
-        self.assertDictEqual(obs, exp)
-
 
 if __name__ == '__main__':
     unittest.main()

From 3c282919f623b9b72c57cd6d8d6587d59d647bf7 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Thu, 17 Oct 2024 15:33:46 +0200
Subject: [PATCH 15/22] removed filemixin from mags

---
 q2_types/per_sample_sequences/_formats.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/q2_types/per_sample_sequences/_formats.py b/q2_types/per_sample_sequences/_formats.py
index 4973821..d762f1f 100644
--- a/q2_types/per_sample_sequences/_formats.py
+++ b/q2_types/per_sample_sequences/_formats.py
@@ -536,8 +536,7 @@ def _validate_(self, level):
 
 
 class MultiFASTADirectoryFormat(MultiDirValidationMixin,
-                                model.DirectoryFormat,
-                                FileDictMixin):
+                                model.DirectoryFormat):
     pathspec = r'.+\.(fa|fasta)$'
     sequences = model.FileCollection(pathspec, format=DNAFASTAFormat)
 

From e77d66f36328ea32e84bd43d0b3ca6a43f151df3 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Thu, 17 Oct 2024 15:58:01 +0200
Subject: [PATCH 16/22] lint

---
 q2_types/genome_data/_formats.py          | 1 -
 q2_types/per_sample_sequences/_formats.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/q2_types/genome_data/_formats.py b/q2_types/genome_data/_formats.py
index 456ebe6..868a9a2 100644
--- a/q2_types/genome_data/_formats.py
+++ b/q2_types/genome_data/_formats.py
@@ -6,7 +6,6 @@
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
 import re
-from collections import defaultdict
 
 import qiime2.plugin.model as model
 from qiime2.plugin import ValidationError
diff --git a/q2_types/per_sample_sequences/_formats.py b/q2_types/per_sample_sequences/_formats.py
index d762f1f..17bb3a4 100644
--- a/q2_types/per_sample_sequences/_formats.py
+++ b/q2_types/per_sample_sequences/_formats.py
@@ -25,7 +25,7 @@
 from q2_types.bowtie2 import Bowtie2IndexDirFmt
 from q2_types.feature_data import DNAFASTAFormat
 from ._util import _parse_sequence_filename, _manifest_to_df
-from .._util import FastqGzFormat, FileDictMixin
+from .._util import FastqGzFormat
 
 
 class FastqAbsolutePathManifestFormatV2(model.TextFileFormat):

From d6d80ee724a30705ea6663da7e5c05a78f17824f Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Wed, 5 Feb 2025 13:46:41 +0100
Subject: [PATCH 17/22] canges after review

---
 q2_types/_util.py           | 23 ++++++++++++-----------
 q2_types/tests/test_util.py |  6 ++++--
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/q2_types/_util.py b/q2_types/_util.py
index f724c07..9af74b8 100644
--- a/q2_types/_util.py
+++ b/q2_types/_util.py
@@ -143,34 +143,33 @@ def _validate_mag_ids(
 
 
 class FileDictMixin:
-    def file_dict(self, relative=False, suffixes=None):
+    def file_dict(self, relative=False):
         """
         For per sample directories it returns a mapping of sample id to
         another dictionary where keys represent the file name and values
         correspond to the filepath for each file matching the pathspec.
         For files, it returns a mapping of file name to filepath for each
-        file matching the pathspec. The specified suffixes are removed
-        from filenames.
+        file matching the pathspec. If the dir format has the attribute
+        'suffixes', then these are removed from filenames.
 
         Parameters
         ---------
         relative : bool
             Whether to return filepaths relative to the directory's location.
             Returns absolute filepaths by default.
-        suffixes : List
-            A list of suffixes that should be removed from the filenames to
-            generate the ID.
 
         Returns
         -------
         dict
-            Mapping of filename -> filepath as described above.
+            Mapping of sample id -> filepath as described above.
             Or mapping of sample id -> dict {filename: filepath} as
             described above.
             Both levels of the dictionary are sorted alphabetically by key.
         """
+        suffixes = getattr(self, "suffixes", [])
         file_pattern = re.compile(self.pathspec)
         ids = defaultdict(dict)
+
         for entry in self.path.iterdir():
             if entry.is_dir():
                 outer_id = entry.name
@@ -184,7 +183,7 @@ def file_dict(self, relative=False, suffixes=None):
                             suffixes=suffixes,
                         )
 
-                        ids[outer_id][inner_id] = str(file_path)
+                        ids[outer_id][inner_id] = file_path
                 ids[outer_id] = dict(sorted(ids[outer_id].items()))
             else:
                 if file_pattern.match(entry.name):
@@ -196,7 +195,7 @@ def file_dict(self, relative=False, suffixes=None):
                         suffixes=suffixes,
                     )
 
-                    ids[inner_id] = str(file_path)
+                    ids[inner_id] = file_path
 
         return dict(sorted(ids.items()))
 
@@ -217,17 +216,19 @@ def _process_path(path, relative, dir_format, suffixes):
             to the directory formats path or absolute.
         dir_format : model.DirectoryFormat.
             Any object of class model.DirectoryFormat.
+        suffixes : List
+            A list of suffixes that should be removed from the filenames to
+            generate the ID.
 
     Returns:
     -------
         processed_path : str
-            The full relative or absolut path to the file.
+            The full relative or absolute path to the file.
         _id : str
             The ID derived from the file name. ID will be "" if the filename
             consists only of the suffix.
     """
     file_name = path.stem
-
     _id = file_name
 
     if suffixes:
diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py
index 110a47e..6e75077 100644
--- a/q2_types/tests/test_util.py
+++ b/q2_types/tests/test_util.py
@@ -67,9 +67,10 @@ def setUp(self):
         self.TestClass.pathspec = r'.+\.(txt|tsv)$'
 
     def test_file_dict_mixin(self):
+        self.TestClass.suffixes = ["_suffix"]
         fmt = self.TestClass(self.get_data_path("per_sample"), mode='r')
 
-        obs = fmt.file_dict(suffixes=["_suffix"])
+        obs = fmt.file_dict()
         exp = {
             "sample1": {
                 "id1": os.path.join(str(fmt), "sample1", "id1_suffix.txt"),
@@ -92,9 +93,10 @@ def test_file_dict_mixin(self):
         self.assertDictEqual(obs, exp)
 
     def test_genes_dirfmt_genome_dict(self):
+        self.TestClass.suffixes = ["_suffix1", "_suffix2"]
         fmt = self.TestClass(self.get_data_path("not_per_sample"), mode='r')
 
-        obs = fmt.file_dict(suffixes=["_suffix1", "_suffix2"])
+        obs = fmt.file_dict()
         exp = {
             "id1": os.path.join(str(fmt), "id1_suffix1.txt"),
             "id2": os.path.join(str(fmt), "id2_suffix2.txt"),

From b567c5a4a43947488a3761bbc6242d2e366db110 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Fri, 7 Feb 2025 15:13:47 +0100
Subject: [PATCH 18/22] added suffixes attribute to the kraken formats

---
 q2_types/kraken2/_formats.py           | 2 ++
 q2_types/kraken2/tests/test_formats.py | 1 +
 q2_types/tests/test_util.py            | 7 ++-----
 setup.py                               | 0
 4 files changed, 5 insertions(+), 5 deletions(-)
 delete mode 100644 setup.py

diff --git a/q2_types/kraken2/_formats.py b/q2_types/kraken2/_formats.py
index f406ac4..2c0d0e0 100644
--- a/q2_types/kraken2/_formats.py
+++ b/q2_types/kraken2/_formats.py
@@ -70,6 +70,7 @@ def _validate_(self, level):
 
 class Kraken2ReportDirectoryFormat(model.DirectoryFormat, FileDictMixin):
     pathspec = r'.+report\.(txt|tsv)$'
+    suffixes = ['.report']
     reports = model.FileCollection(pathspec, format=Kraken2ReportFormat)
 
     @reports.set_path_maker
@@ -148,6 +149,7 @@ def _validate_(self, level):
 
 class Kraken2OutputDirectoryFormat(model.DirectoryFormat, FileDictMixin):
     pathspec = r'.+output\.(txt|tsv)$'
+    suffixes = ['.output']
     reports = model.FileCollection(pathspec, format=Kraken2OutputFormat)
 
     @reports.set_path_maker
diff --git a/q2_types/kraken2/tests/test_formats.py b/q2_types/kraken2/tests/test_formats.py
index 038eede..adce9d0 100644
--- a/q2_types/kraken2/tests/test_formats.py
+++ b/q2_types/kraken2/tests/test_formats.py
@@ -85,6 +85,7 @@ def test_db_report_format_wrong_types(self):
     def test_report_dirfmt_from_reads(self):
         dirpath = self.get_data_path('reports-reads')
         fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r')
+        a = fmt.file_dict()
         fmt.validate()
 
     def test_report_dirfmt_from_mags(self):
diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py
index 6e75077..5dae298 100644
--- a/q2_types/tests/test_util.py
+++ b/q2_types/tests/test_util.py
@@ -81,7 +81,7 @@ def test_file_dict_mixin(self):
         }
         self.assertDictEqual(obs, exp)
 
-        obs = fmt.file_dict(suffixes=["_suffix"], relative=True)
+        obs = fmt.file_dict(relative=True)
         exp = {
             "sample1": {
                 "id1": "sample1/id1_suffix.txt",
@@ -103,10 +103,7 @@ def test_genes_dirfmt_genome_dict(self):
         }
         self.assertDictEqual(obs, exp)
 
-        obs = fmt.file_dict(
-            suffixes=["_suffix1", "_suffix2"],
-            relative=True
-        )
+        obs = fmt.file_dict(relative=True)
         exp = {
             "id1": "id1_suffix1.txt",
             "id2": "id2_suffix2.txt",
diff --git a/setup.py b/setup.py
deleted file mode 100644
index e69de29..0000000

From 123faa7b3a04c4a25be1083395d0d3f92b8aa26b Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Fri, 7 Feb 2025 15:30:09 +0100
Subject: [PATCH 19/22] added test for filedictmixin with krakenoutput format

---
 .../sample1/bin1.output.txt                   | 32 +++++++++++++++++
 .../sample1/bin2.output.txt                   | 32 +++++++++++++++++
 .../sample2/bin3.output.txt                   | 32 +++++++++++++++++
 q2_types/tests/test_util.py                   | 34 +++++++++++++++++--
 4 files changed, 128 insertions(+), 2 deletions(-)
 create mode 100755 q2_types/tests/data/kraken-outputs-mags/sample1/bin1.output.txt
 create mode 100755 q2_types/tests/data/kraken-outputs-mags/sample1/bin2.output.txt
 create mode 100755 q2_types/tests/data/kraken-outputs-mags/sample2/bin3.output.txt

diff --git a/q2_types/tests/data/kraken-outputs-mags/sample1/bin1.output.txt b/q2_types/tests/data/kraken-outputs-mags/sample1/bin1.output.txt
new file mode 100755
index 0000000..21a5992
--- /dev/null
+++ b/q2_types/tests/data/kraken-outputs-mags/sample1/bin1.output.txt
@@ -0,0 +1,32 @@
+C	k119_33069	1912795	10855	1912795:Q
+C	k119_55515	1583098	5698	1583098:Q
+C	k119_66468	1323375	5173	1323375:Q
+C	k119_33506	182217	17101	182217:Q
+C	k119_22814	1472	19997	1472:Q
+C	k119_23274	29388	23523	29388:Q
+C	k119_45180	545501	25821	545501:Q
+C	k119_34380	1218	4423	1218:Q
+C	k119_1654	2518177	31450	2518177:Q
+C	k119_45407	221027	2908	221027:Q
+C	k119_12788	59919	2856	59919:Q
+U	k119_34900	0	3045	0:Q
+C	k119_45855	851	19053	851:Q
+C	k119_90411	2647897	2589	2647897:Q
+C	k119_57806	2653681	4515	2653681:Q
+C	k119_58481	131567	19174	131567:Q
+C	k119_47669	2682541	11848	2682541:Q
+C	k119_59208	1977865	3665	1977865:Q
+C	k119_16398	2770780	5030	2770780:Q
+C	k119_60835	400634	2807	400634:Q
+C	k119_49584	2490633	6493	2490633:Q
+C	k119_28869	111780	8356	111780:Q
+C	k119_94747	2305987	3774	2305987:Q
+C	k119_40414	983544	27806	983544:Q
+C	k119_73618	2563896	3473	2563896:Q
+C	k119_84540	332101	3409	332101:Q
+C	k119_73768	2593542	29942	2593542:Q
+C	k119_41848	34105	8793	34105:Q
+C	k119_43035	1301	4680	1301:Q
+C	k119_65066	1547445	10430	1547445:Q
+C	k119_10361	491950	68731	491950:Q
+C	k119_10711	52959	8685	52959:Q
diff --git a/q2_types/tests/data/kraken-outputs-mags/sample1/bin2.output.txt b/q2_types/tests/data/kraken-outputs-mags/sample1/bin2.output.txt
new file mode 100755
index 0000000..21a5992
--- /dev/null
+++ b/q2_types/tests/data/kraken-outputs-mags/sample1/bin2.output.txt
@@ -0,0 +1,32 @@
+C	k119_33069	1912795	10855	1912795:Q
+C	k119_55515	1583098	5698	1583098:Q
+C	k119_66468	1323375	5173	1323375:Q
+C	k119_33506	182217	17101	182217:Q
+C	k119_22814	1472	19997	1472:Q
+C	k119_23274	29388	23523	29388:Q
+C	k119_45180	545501	25821	545501:Q
+C	k119_34380	1218	4423	1218:Q
+C	k119_1654	2518177	31450	2518177:Q
+C	k119_45407	221027	2908	221027:Q
+C	k119_12788	59919	2856	59919:Q
+U	k119_34900	0	3045	0:Q
+C	k119_45855	851	19053	851:Q
+C	k119_90411	2647897	2589	2647897:Q
+C	k119_57806	2653681	4515	2653681:Q
+C	k119_58481	131567	19174	131567:Q
+C	k119_47669	2682541	11848	2682541:Q
+C	k119_59208	1977865	3665	1977865:Q
+C	k119_16398	2770780	5030	2770780:Q
+C	k119_60835	400634	2807	400634:Q
+C	k119_49584	2490633	6493	2490633:Q
+C	k119_28869	111780	8356	111780:Q
+C	k119_94747	2305987	3774	2305987:Q
+C	k119_40414	983544	27806	983544:Q
+C	k119_73618	2563896	3473	2563896:Q
+C	k119_84540	332101	3409	332101:Q
+C	k119_73768	2593542	29942	2593542:Q
+C	k119_41848	34105	8793	34105:Q
+C	k119_43035	1301	4680	1301:Q
+C	k119_65066	1547445	10430	1547445:Q
+C	k119_10361	491950	68731	491950:Q
+C	k119_10711	52959	8685	52959:Q
diff --git a/q2_types/tests/data/kraken-outputs-mags/sample2/bin3.output.txt b/q2_types/tests/data/kraken-outputs-mags/sample2/bin3.output.txt
new file mode 100755
index 0000000..21a5992
--- /dev/null
+++ b/q2_types/tests/data/kraken-outputs-mags/sample2/bin3.output.txt
@@ -0,0 +1,32 @@
+C	k119_33069	1912795	10855	1912795:Q
+C	k119_55515	1583098	5698	1583098:Q
+C	k119_66468	1323375	5173	1323375:Q
+C	k119_33506	182217	17101	182217:Q
+C	k119_22814	1472	19997	1472:Q
+C	k119_23274	29388	23523	29388:Q
+C	k119_45180	545501	25821	545501:Q
+C	k119_34380	1218	4423	1218:Q
+C	k119_1654	2518177	31450	2518177:Q
+C	k119_45407	221027	2908	221027:Q
+C	k119_12788	59919	2856	59919:Q
+U	k119_34900	0	3045	0:Q
+C	k119_45855	851	19053	851:Q
+C	k119_90411	2647897	2589	2647897:Q
+C	k119_57806	2653681	4515	2653681:Q
+C	k119_58481	131567	19174	131567:Q
+C	k119_47669	2682541	11848	2682541:Q
+C	k119_59208	1977865	3665	1977865:Q
+C	k119_16398	2770780	5030	2770780:Q
+C	k119_60835	400634	2807	400634:Q
+C	k119_49584	2490633	6493	2490633:Q
+C	k119_28869	111780	8356	111780:Q
+C	k119_94747	2305987	3774	2305987:Q
+C	k119_40414	983544	27806	983544:Q
+C	k119_73618	2563896	3473	2563896:Q
+C	k119_84540	332101	3409	332101:Q
+C	k119_73768	2593542	29942	2593542:Q
+C	k119_41848	34105	8793	34105:Q
+C	k119_43035	1301	4680	1301:Q
+C	k119_65066	1547445	10430	1547445:Q
+C	k119_10361	491950	68731	491950:Q
+C	k119_10711	52959	8685	52959:Q
diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py
index 5dae298..0b15eee 100644
--- a/q2_types/tests/test_util.py
+++ b/q2_types/tests/test_util.py
@@ -8,6 +8,7 @@
 import os
 from pathlib import Path
 
+from q2_types.kraken2 import Kraken2OutputDirectoryFormat
 from qiime2.plugin import model
 from qiime2.plugin.testing import TestPluginBase
 
@@ -66,7 +67,7 @@ def setUp(self):
         )
         self.TestClass.pathspec = r'.+\.(txt|tsv)$'
 
-    def test_file_dict_mixin(self):
+    def test_file_dict_mixin_per_sample(self):
         self.TestClass.suffixes = ["_suffix"]
         fmt = self.TestClass(self.get_data_path("per_sample"), mode='r')
 
@@ -92,7 +93,7 @@ def test_file_dict_mixin(self):
         }
         self.assertDictEqual(obs, exp)
 
-    def test_genes_dirfmt_genome_dict(self):
+    def test_file_dict_mixin_per_sample_not_per_sample(self):
         self.TestClass.suffixes = ["_suffix1", "_suffix2"]
         fmt = self.TestClass(self.get_data_path("not_per_sample"), mode='r')
 
@@ -110,6 +111,35 @@ def test_genes_dirfmt_genome_dict(self):
         }
         self.assertDictEqual(obs, exp)
 
+    def test_file_dict_mixin_kraken_outputs(self):
+        fmt = Kraken2OutputDirectoryFormat(
+            self.get_data_path("kraken-outputs-mags"), mode='r'
+        )
+
+        obs = fmt.file_dict()
+        exp = {
+            "sample1": {
+                "bin1": os.path.join(str(fmt), "sample1", "bin1.output.txt"),
+                "bin2": os.path.join(str(fmt), "sample1", "bin2.output.txt"),
+            },
+            "sample2": {
+                "bin3": os.path.join(str(fmt), "sample2", "bin3.output.txt"),
+            },
+        }
+        self.assertDictEqual(obs, exp)
+
+        obs = fmt.file_dict(relative=True)
+        exp = {
+            "sample1": {
+                "bin1": "sample1/bin1.output.txt",
+                "bin2": "sample1/bin2.output.txt",
+            },
+            "sample2": {
+                "bin3": "sample2/bin3.output.txt",
+            },
+        }
+        self.assertDictEqual(obs, exp)
+
 
 class TestProcessPath(TestPluginBase):
     package = "q2_types.tests"

From ec6ac6fcef77439c2770743ec49c2bd2c559bf70 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Fri, 7 Feb 2025 15:32:41 +0100
Subject: [PATCH 20/22] removed debug print

---
 q2_types/kraken2/tests/test_formats.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/q2_types/kraken2/tests/test_formats.py b/q2_types/kraken2/tests/test_formats.py
index adce9d0..038eede 100644
--- a/q2_types/kraken2/tests/test_formats.py
+++ b/q2_types/kraken2/tests/test_formats.py
@@ -85,7 +85,6 @@ def test_db_report_format_wrong_types(self):
     def test_report_dirfmt_from_reads(self):
         dirpath = self.get_data_path('reports-reads')
         fmt = Kraken2ReportDirectoryFormat(dirpath, mode='r')
-        a = fmt.file_dict()
         fmt.validate()
 
     def test_report_dirfmt_from_mags(self):

From 8fb5a69e79efdb6810c9cec53840dcd5d9307409 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Tue, 11 Feb 2025 10:50:21 +0100
Subject: [PATCH 21/22] added process_path to the filedictmixin and changed the
 tests

---
 q2_types/_util.py                             | 94 +++++++++----------
 .../{id1_suffix.txt => id1_suffix1.txt}       |  0
 .../{id2_suffix.txt => id2_suffix1.txt}       |  0
 q2_types/tests/test_util.py                   | 57 ++++-------
 4 files changed, 64 insertions(+), 87 deletions(-)
 rename q2_types/tests/data/per_sample/sample1/{id1_suffix.txt => id1_suffix1.txt} (100%)
 rename q2_types/tests/data/per_sample/sample2/{id2_suffix.txt => id2_suffix1.txt} (100%)

diff --git a/q2_types/_util.py b/q2_types/_util.py
index 9af74b8..8cd80a7 100644
--- a/q2_types/_util.py
+++ b/q2_types/_util.py
@@ -166,7 +166,6 @@ def file_dict(self, relative=False):
             described above.
             Both levels of the dictionary are sorted alphabetically by key.
         """
-        suffixes = getattr(self, "suffixes", [])
         file_pattern = re.compile(self.pathspec)
         ids = defaultdict(dict)
 
@@ -176,11 +175,9 @@ def file_dict(self, relative=False):
                 for path in entry.iterdir():
                     if file_pattern.match(path.name):
 
-                        file_path, inner_id = _process_path(
+                        file_path, inner_id = self._process_path(
                             path=path,
                             relative=relative,
-                            dir_format=self,
-                            suffixes=suffixes,
                         )
 
                         ids[outer_id][inner_id] = file_path
@@ -188,11 +185,9 @@ def file_dict(self, relative=False):
             else:
                 if file_pattern.match(entry.name):
 
-                    file_path, inner_id = _process_path(
+                    file_path, inner_id = self._process_path(
                         path=entry,
                         relative=relative,
-                        dir_format=self,
-                        suffixes=suffixes,
                     )
 
                     ids[inner_id] = file_path
@@ -200,46 +195,45 @@ def file_dict(self, relative=False):
         return dict(sorted(ids.items()))
 
 
-def _process_path(path, relative, dir_format, suffixes):
-    """
-    This function processes the input file path to generate an absolute or
-    relative path string and the ID derived from the file name. The ID is
-    extracted by removing the one of the specified suffixes from the file
-    name. If no suffixes are specified the ID is defined to be the filename.
-
-    Parameters:
-    ---------
-        path : Path
-            A Path object representing the file path to process.
-        relative : bool
-            A flag indicating whether the returned path should be relative
-            to the directory formats path or absolute.
-        dir_format : model.DirectoryFormat.
-            Any object of class model.DirectoryFormat.
-        suffixes : List
-            A list of suffixes that should be removed from the filenames to
-            generate the ID.
-
-    Returns:
-    -------
-        processed_path : str
-            The full relative or absolute path to the file.
-        _id : str
-            The ID derived from the file name. ID will be "" if the filename
-            consists only of the suffix.
-    """
-    file_name = path.stem
-    _id = file_name
-
-    if suffixes:
-        for suffix in suffixes:
-            if file_name.endswith(suffix):
-                _id = file_name[:-len(suffix)]
-                break
-
-    processed_path = (
-        path.absolute().relative_to(dir_format.path.absolute())
-        if relative
-        else path.absolute()
-    )
-    return str(processed_path), _id
+    def _process_path(self, path, relative=False):
+        """
+        This function processes the input file path to generate an absolute or
+        relative path string and the ID derived from the file name. The ID is
+        extracted by removing the one of the specified suffixes from the file
+        name. If the class does not have a suffixes attribute, then the ID is
+        defined to be the filename.
+
+        Parameters:
+        ---------
+            path : Path
+                A Path object representing the file path to process.
+            relative : bool
+                A flag indicating whether the returned path should be relative
+                to the directory formats path or absolute.
+            dir_format : model.DirectoryFormat.
+                Any object of class model.DirectoryFormat.
+
+        Returns:
+        -------
+            processed_path : str
+                The full relative or absolute path to the file.
+            _id : str
+                The ID derived from the file name. ID will be "" if the filename
+                consists only of the suffix.
+        """
+        file_name = path.stem
+        _id = file_name
+        suffixes = getattr(self, "suffixes", [])
+
+        if suffixes:
+            for suffix in suffixes:
+                if file_name.endswith(suffix):
+                    _id = file_name[:-len(suffix)]
+                    break
+
+        processed_path = (
+            path.absolute().relative_to(self.path.absolute())
+            if relative
+            else path.absolute()
+        )
+        return str(processed_path), _id
diff --git a/q2_types/tests/data/per_sample/sample1/id1_suffix.txt b/q2_types/tests/data/per_sample/sample1/id1_suffix1.txt
similarity index 100%
rename from q2_types/tests/data/per_sample/sample1/id1_suffix.txt
rename to q2_types/tests/data/per_sample/sample1/id1_suffix1.txt
diff --git a/q2_types/tests/data/per_sample/sample2/id2_suffix.txt b/q2_types/tests/data/per_sample/sample2/id2_suffix1.txt
similarity index 100%
rename from q2_types/tests/data/per_sample/sample2/id2_suffix.txt
rename to q2_types/tests/data/per_sample/sample2/id2_suffix1.txt
diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py
index 0b15eee..5e0ff7d 100644
--- a/q2_types/tests/test_util.py
+++ b/q2_types/tests/test_util.py
@@ -13,7 +13,7 @@
 from qiime2.plugin.testing import TestPluginBase
 
 from q2_types._util import _validate_num_partitions, _validate_mag_ids, \
-    FileDictMixin, _process_path
+    FileDictMixin
 
 
 class TestUtil(TestPluginBase):
@@ -54,7 +54,7 @@ def test_validate_mag_ids_invalid(self):
             )
 
 
-class TestFileDictMixing(TestPluginBase):
+class TestFileDictMixin(TestPluginBase):
     package = "q2_types.tests"
 
     def setUp(self):
@@ -66,18 +66,18 @@ def setUp(self):
             {}
         )
         self.TestClass.pathspec = r'.+\.(txt|tsv)$'
+        self.TestClass.suffixes = ["_suffix1", "_suffix2"]
 
     def test_file_dict_mixin_per_sample(self):
-        self.TestClass.suffixes = ["_suffix"]
         fmt = self.TestClass(self.get_data_path("per_sample"), mode='r')
 
         obs = fmt.file_dict()
         exp = {
             "sample1": {
-                "id1": os.path.join(str(fmt), "sample1", "id1_suffix.txt"),
+                "id1": os.path.join(str(fmt), "sample1", "id1_suffix1.txt"),
             },
             "sample2": {
-                "id2": os.path.join(str(fmt), "sample2", "id2_suffix.txt"),
+                "id2": os.path.join(str(fmt), "sample2", "id2_suffix1.txt"),
             },
         }
         self.assertDictEqual(obs, exp)
@@ -85,16 +85,15 @@ def test_file_dict_mixin_per_sample(self):
         obs = fmt.file_dict(relative=True)
         exp = {
             "sample1": {
-                "id1": "sample1/id1_suffix.txt",
+                "id1": "sample1/id1_suffix1.txt",
             },
             "sample2": {
-                "id2": "sample2/id2_suffix.txt",
+                "id2": "sample2/id2_suffix1.txt",
             },
         }
         self.assertDictEqual(obs, exp)
 
-    def test_file_dict_mixin_per_sample_not_per_sample(self):
-        self.TestClass.suffixes = ["_suffix1", "_suffix2"]
+    def test_file_dict_mixin_not_per_sample(self):
         fmt = self.TestClass(self.get_data_path("not_per_sample"), mode='r')
 
         obs = fmt.file_dict()
@@ -141,54 +140,38 @@ def test_file_dict_mixin_kraken_outputs(self):
         self.assertDictEqual(obs, exp)
 
 
-class TestProcessPath(TestPluginBase):
-    package = "q2_types.tests"
-
-    def setUp(self):
-        super().setUp()
-        self.dir_fmt = model.DirectoryFormat()
-
     def test_process_path_with_suffix(self):
-        # Test when the file name ends with a given suffix
-        path = Path(self.dir_fmt.path / "sample_id_suffix1.txt")
-        suffixes = ["_suffix1", "_suffix2"]
+        # Test when class does have suffixes attribute
+        test_class = self.TestClass()
+        path = Path(test_class.path / "sample_id_suffix1.txt")
 
-        result_path, result_id = _process_path(
+        result_path, result_id = test_class._process_path(
             path,
             relative=True,
-            dir_format=self.dir_fmt,
-            suffixes=suffixes
         )
 
         self.assertEqual(result_id, "sample_id")
         self.assertEqual(result_path, "sample_id_suffix1.txt")
 
     def test_process_path_without_suffix(self):
-        # Test when no suffix matches the file name
-        path = Path(self.dir_fmt.path / "sample_id.txt")
-        suffixes = ["_suffix1", "_suffix2"]
+        # Test when class does not have suffixes attribute
+        test_class = self.TestClass()
+        delattr(self.TestClass, "suffixes")
+        path = Path(test_class.path / "sample_id.txt")
 
-        result_path, result_id = _process_path(
+        result_path, result_id = test_class._process_path(
             path,
             relative=True,
-            dir_format=self.dir_fmt,
-            suffixes=suffixes
         )
 
         self.assertEqual(result_id, "sample_id")
         self.assertEqual(result_path, "sample_id.txt")
 
     def test_process_path_absolute(self):
-        # Test when the relative flag is False (absolute path is returned)
-        path = Path(self.dir_fmt.path / "sample_id_suffix2.txt")
-        suffixes = ["_suffix1", "_suffix2"]
+        test_class = self.TestClass()
+        path = Path(test_class.path / "sample_id_suffix1.txt")
 
-        result_path, result_id = _process_path(
-            path,
-            relative=False,
-            dir_format=self.dir_fmt,
-            suffixes=suffixes
-        )
+        result_path, result_id = test_class._process_path(path)
 
         self.assertEqual(result_id, "sample_id")
         self.assertEqual(result_path, str(path.absolute()))

From ae09d8f46e7213e1240c5f053d9d34fb2e776ab4 Mon Sep 17 00:00:00 2001
From: VinzentRisch <risch.vinzent@gmail.com>
Date: Tue, 11 Feb 2025 11:05:05 +0100
Subject: [PATCH 22/22] lint

---
 q2_types/_util.py           | 14 +++++---------
 q2_types/tests/test_util.py |  1 -
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/q2_types/_util.py b/q2_types/_util.py
index 8cd80a7..4df29eb 100644
--- a/q2_types/_util.py
+++ b/q2_types/_util.py
@@ -194,14 +194,13 @@ def file_dict(self, relative=False):
 
         return dict(sorted(ids.items()))
 
-
     def _process_path(self, path, relative=False):
         """
         This function processes the input file path to generate an absolute or
         relative path string and the ID derived from the file name. The ID is
-        extracted by removing the one of the specified suffixes from the file
-        name. If the class does not have a suffixes attribute, then the ID is
-        defined to be the filename.
+        extracted by removing one of the suffixes from the file name. If the
+        class does not have a suffixes attribute, then the ID is defined to
+        be the filename.
 
         Parameters:
         ---------
@@ -210,16 +209,13 @@ def _process_path(self, path, relative=False):
             relative : bool
                 A flag indicating whether the returned path should be relative
                 to the directory formats path or absolute.
-            dir_format : model.DirectoryFormat.
-                Any object of class model.DirectoryFormat.
 
         Returns:
         -------
             processed_path : str
-                The full relative or absolute path to the file.
+                The relative or absolute path to the file.
             _id : str
-                The ID derived from the file name. ID will be "" if the filename
-                consists only of the suffix.
+                The ID derived from the file name.
         """
         file_name = path.stem
         _id = file_name
diff --git a/q2_types/tests/test_util.py b/q2_types/tests/test_util.py
index 5e0ff7d..0c495ea 100644
--- a/q2_types/tests/test_util.py
+++ b/q2_types/tests/test_util.py
@@ -139,7 +139,6 @@ def test_file_dict_mixin_kraken_outputs(self):
         }
         self.assertDictEqual(obs, exp)
 
-
     def test_process_path_with_suffix(self):
         # Test when class does have suffixes attribute
         test_class = self.TestClass()