Merge pull request #308 from juaml/refactor/dg-patterns

[ENH]: Improving DataGrabber `patterns`
juaml · Apr 4, 2024 · a9e799c · a9e799c
2 parents b2df91c + 3e89413
commit a9e799c
Show file tree

Hide file tree

Showing 26 changed files with 1,357 additions and 739 deletions.
diff --git a/docs/changes/newsfragments/308.enh b/docs/changes/newsfragments/308.enh
@@ -0,0 +1 @@
+Improve :class:`.PatternDataGrabber` and :class:`.PatternDataladDataGrabber`'s ``patterns`` to enable ``space``, ``format``, ``mask_item`` and other metadata description handling via YAML by `Synchon Mandal`_
diff --git a/docs/extending/datagrabber.rst b/docs/extending/datagrabber.rst
@@ -61,7 +61,7 @@ Now that we have our element defined, we need to think about the structure of
 the dataset. Mainly, because the structure of the dataset will determine how
 the DataGrabber needs to be implemented.
 
-``junifer`` provides an abstract class to deal with datasets that can be thought
+``junifer`` provides a concrete class to deal with datasets that can be thought
 in terms of *patterns*. A *pattern* is a string that contains placeholders that
 are replaced by the actual values of the element. In our BIDS example, the path
 to the T1w image of subject ``sub-01`` and session ``ses-01``, relative to the
@@ -98,13 +98,14 @@ Step 3: Create a Data Grabber
 Option A: Extending from PatternDataGrabber
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The :class:`.PatternDataGrabber` class is an abstract class that has the
+The :class:`.PatternDataGrabber` class is a concrete class that has the
 functionality of understanding patterns embedded in it.
 
 Before creating the DataGrabber, we need to define 3 variables:
 
 * ``types``: A list with the available :ref:`data_types` in our dataset.
-* ``patterns``: A dictionary that specifies the pattern for each data type.
+* ``patterns``: A dictionary that specifies the pattern and some additional
+  information for each data type.
 * ``replacements``: A list indicating which of the elements in the patterns
   should be replaced by the values of the element.
 
@@ -114,8 +115,14 @@ For example, in our BIDS example, the variables will be:
 
     types = ["T1w", "BOLD"]
     patterns = {
-       "T1w": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
-       "BOLD": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
+       "T1w": {
+           "pattern": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
+           "space": "native",
+       },
+       "BOLD": {
+           "pattern": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
+           "space": "MNI152NLin6Asym",
+       },
     }
     replacements = ["subject", "session"]
 
@@ -141,8 +148,14 @@ With the variables defined above, we can create our DataGrabber and name it
         def __init__(self, datadir: str | Path) -> None:
             types = ["T1w", "BOLD"]
             patterns = {
-               "T1w": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
-               "BOLD": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
+               "T1w": {
+                   "pattern": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
+                   "space": "native",
+               },
+               "BOLD": {
+                   "pattern": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
+                   "space": "MNI152NLin6Asym",
+               },
             }
             replacements = ["subject", "session"]
             super().__init__(
@@ -171,8 +184,14 @@ use the :func:`.register_datagrabber` decorator.
         def __init__(self, datadir: str | Path) -> None:
             types = ["T1w", "BOLD"]
             patterns = {
-               "T1w": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
-               "BOLD": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
+               "T1w": {
+                   "pattern": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
+                   "space": "native",
+               },
+               "BOLD": {
+                   "pattern": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
+                   "space": "MNI152NLin6Asym",
+               },
             }
             replacements = ["subject", "session"]
             super().__init__(
@@ -252,8 +271,14 @@ And we can create our DataGrabber:
         def __init__(self) -> None:
             types = ["T1w", "BOLD"]
             patterns = {
-               "T1w": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
-               "BOLD": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
+               "T1w": {
+                   "pattern": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
+                   "space": "native",
+               },
+               "BOLD": {
+                   "pattern": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
+                   "space": "MNI152NLin6Asym",
+               },
             }
             replacements = ["subject", "session"]
             uri = "https://gin.g-node.org/juaml/datalad-example-bids"
@@ -277,13 +302,17 @@ This approach can be used directly from the YAML, like so:
          - BOLD
          - T1w
        patterns:
-         BOLD: "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz"
-         T1w: "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz"
+         BOLD:
+           pattern: "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz"
+           space: MNI152NLin6Asym
+         T1w:
+           pattern: "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz"
+           space: native
        replacements:
          - subject
          - session
        uri: "https://gin.g-node.org/juaml/datalad-example-bids"
-       rootdir: "example_bids_ses"
+       rootdir: example_bids_ses
 
 .. _extending_datagrabbers_base:
 
@@ -314,10 +343,16 @@ and ``session``, we will use them as parameters of ``get_item``:
 
 .. code-block:: python
 
-   def get_item(self, subject: str, session: str) -> dict[str, str]:
+   def get_item(self, subject: str, session: str) -> dict[str, dict[str, str]]:
       out = {
-         "T1w": f"{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
-         "BOLD": f"{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
+         "T1w": {
+             "path": f"{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
+             "space": "native",
+         },
+         "BOLD": {
+             "path": f"{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
+             "space": "MNI152NLin6Asym",
+         },
       }
       return out
 
@@ -367,12 +402,18 @@ So, to summarise, our DataGrabber will look like this:
    @register_datagrabber
    class ExampleBIDSDataGrabber(BaseDataGrabber):
 
-      def get_item(self, subject: str, session: str) -> dict[str, str]:
+      def get_item(self, subject: str, session: str) -> dict[str, dict[str, str]]:
          out = {
-            "T1w": f"{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
-            "BOLD": f"{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
+            "T1w": {
+                "path": f"{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
+                "space": "native",
+            },
+            "BOLD": {
+                "path": f"{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
+                "space": "MNI152NLin6Asym",
+            },
          }
-      return out
+         return out
 
       def get_elements(self) -> list[str]:
          subjects = ["sub-01", "sub-02", "sub-03"]
@@ -438,16 +479,20 @@ this:
       self, subject: str, session: str
    ) -> dict:
       out = {
-         "BOLD": f"{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
+         "BOLD": {
+             "path": f"{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
+             "space": "MNI152NLin6Asym",
+         },
          "BOLD_confounds": {
-            "path": f"{subject}/{session}/func/{subject}_{session}_confounds.tsv",
-            "format": "adhoc",
-            "mappings": {
-               "fmriprep": {
-                  "variable1": "rot_x",
-                  "variable2": "rot_z",
-                  "variable3": "rot_y",
-               }
+             "path": f"{subject}/{session}/func/{subject}_{session}_confounds.tsv",
+             "format": "adhoc",
+             "mappings": {
+                 "fmriprep": {
+                    "variable1": "rot_x",
+                    "variable2": "rot_z",
+                    "variable3": "rot_y",
+                 },
+             },
          },
       }
 

diff --git a/docs/understanding/data.rst b/docs/understanding/data.rst
@@ -103,6 +103,9 @@ Data Types
    * - ``T1w``
      - T1w image (3D)
      - Preprocessed or Raw T1w image
+   * - ``T2w``
+     - T2w image (3D)
+     - Preprocessed or Raw T2w image
    * - ``BOLD``
      - BOLD image (4D)
      - Preprocessed or Denoised BOLD image (fMRIPrep output)

diff --git a/examples/run_datagrabber_bids_datalad.py b/examples/run_datagrabber_bids_datalad.py
@@ -25,8 +25,14 @@
 # replaced in the patterns.
 types = ["T1w", "BOLD"]
 patterns = {
-    "T1w": "{subject}/anat/{subject}_T1w.nii.gz",
-    "BOLD": "{subject}/func/{subject}_task-rest_bold.nii.gz",
+    "T1w": {
+        "pattern": "{subject}/anat/{subject}_T1w.nii.gz",
+        "space": "native",
+    },
+    "BOLD": {
+        "pattern": "{subject}/func/{subject}_task-rest_bold.nii.gz",
+        "space": "MNI152NLin6Asym",
+    },
 }
 replacements = ["subject"]
 ###############################################################################

diff --git a/junifer/configs/juseless/datagrabbers/aomic_id1000_vbm.py b/junifer/configs/juseless/datagrabbers/aomic_id1000_vbm.py
@@ -31,7 +31,12 @@ def __init__(self, datadir: Union[str, Path, None] = None) -> None:
         types = ["VBM_GM"]
         replacements = ["subject"]
         patterns = {
-            "VBM_GM": "sub-{subject}/mri/mwp1sub-{subject}_run-2_T1w.nii.gz",
+            "VBM_GM": {
+                "pattern": (
+                    "sub-{subject}/mri/mwp1sub-{subject}_run-2_T1w.nii.gz"
+                ),
+                "space": "IXI549Space",
+            },
         }
         super().__init__(
             types=types,

diff --git a/junifer/configs/juseless/datagrabbers/camcan_vbm.py b/junifer/configs/juseless/datagrabbers/camcan_vbm.py
@@ -34,7 +34,12 @@ def __init__(self, datadir: Union[str, Path, None] = None) -> None:
         )
         types = ["VBM_GM"]
         replacements = ["subject"]
-        patterns = {"VBM_GM": "sub-{subject}/mri/m0wp1sub-{subject}.nii.gz"}
+        patterns = {
+            "VBM_GM": {
+                "pattern": "sub-{subject}/mri/m0wp1sub-{subject}.nii.gz",
+                "space": "IXI549Space",
+            },
+        }
         super().__init__(
             types=types,
             datadir=datadir,

diff --git a/junifer/configs/juseless/datagrabbers/ixi_vbm.py b/junifer/configs/juseless/datagrabbers/ixi_vbm.py
@@ -43,7 +43,12 @@ def __init__(
         types = ["VBM_GM"]
         replacements = ["site", "subject"]
         patterns = {
-            "VBM_GM": "{site}/sub-{subject}/mri/m0wp1sub-{subject}.nii.gz"
+            "VBM_GM": {
+                "pattern": (
+                    "{site}/sub-{subject}/mri/m0wp1sub-{subject}.nii.gz"
+                ),
+                "space": "IXI549Space",
+            },
         }
 
         # validate and/or transform 'site' input

diff --git a/junifer/configs/juseless/datagrabbers/ucla.py b/junifer/configs/juseless/datagrabbers/ucla.py
@@ -70,30 +70,48 @@ def __init__(
         self.tasks = tasks
         # The patterns
         patterns = {
-            "BOLD": (
-                "sub-{subject}/func/sub-{subject}_task-{task}_bold_space-"
-                "MNI152NLin2009cAsym_preproc.nii.gz"
-            ),
-            "BOLD_confounds": (
-                "sub-{subject}/func/sub-{subject}_"
-                "task-{task}_bold_confounds.tsv"
-            ),
-            "T1w": (
-                "sub-{subject}/anat/sub-{subject}_"
-                "T1w_space-MNI152NLin2009cAsym_preproc.nii.gz"
-            ),
-            "probseg_CSF": (
-                "sub-{subject}/anat/sub-{subject}_T1w_space-"
-                "MNI152NLin2009cAsym_class-CSF_probtissue.nii.gz"
-            ),
-            "probseg_GM": (
-                "sub-{subject}/anat/sub-{subject}_T1w_space-"
-                "MNI152NLin2009cAsym_class-GM_probtissue.nii.gz"
-            ),
-            "probseg_WM": (
-                "sub-{subject}/anat/sub-{subject}_T1w_space"
-                "-MNI152NLin2009cAsym_class-WM_probtissue.nii.gz"
-            ),
+            "BOLD": {
+                "pattern": (
+                    "sub-{subject}/func/sub-{subject}_task-{task}_bold_space-"
+                    "MNI152NLin2009cAsym_preproc.nii.gz"
+                ),
+                "space": "MNI152NLin2009cAsym",
+            },
+            "BOLD_confounds": {
+                "pattern": (
+                    "sub-{subject}/func/sub-{subject}_"
+                    "task-{task}_bold_confounds.tsv"
+                ),
+                "space": "fmriprep",
+            },
+            "T1w": {
+                "pattern": (
+                    "sub-{subject}/anat/sub-{subject}_"
+                    "T1w_space-MNI152NLin2009cAsym_preproc.nii.gz"
+                ),
+                "space": "MNI152NLin2009cAsym",
+            },
+            "probseg_CSF": {
+                "pattern": (
+                    "sub-{subject}/anat/sub-{subject}_T1w_space-"
+                    "MNI152NLin2009cAsym_class-CSF_probtissue.nii.gz"
+                ),
+                "space": "MNI152NLin2009cAsym",
+            },
+            "probseg_GM": {
+                "pattern": (
+                    "sub-{subject}/anat/sub-{subject}_T1w_space-"
+                    "MNI152NLin2009cAsym_class-GM_probtissue.nii.gz"
+                ),
+                "space": "MNI152NLin2009cAsym",
+            },
+            "probseg_WM": {
+                "pattern": (
+                    "sub-{subject}/anat/sub-{subject}_T1w_space"
+                    "-MNI152NLin2009cAsym_class-WM_probtissue.nii.gz"
+                ),
+                "space": "MNI152NLin2009cAsym",
+            },
         }
         # Set default types
         if types is None:

diff --git a/junifer/configs/juseless/datagrabbers/ukb_vbm.py b/junifer/configs/juseless/datagrabbers/ukb_vbm.py
@@ -32,7 +32,12 @@ def __init__(self, datadir: Union[str, Path, None] = None) -> None:
         rootdir = "m0wp1"
         types = ["VBM_GM"]
         replacements = ["subject", "session"]
-        patterns = {"VBM_GM": "m0wp1sub-{subject}_ses-{session}_T1w.nii.gz"}
+        patterns = {
+            "VBM_GM": {
+                "pattern": "m0wp1sub-{subject}_ses-{session}_T1w.nii.gz",
+                "space": "IXI549Space",
+            },
+        }
         super().__init__(
             types=types,
             datadir=datadir,
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Improve :class:`.PatternDataGrabber` and :class:`.PatternDataladDataGrabber`'s ``patterns`` to enable ``space``, ``format``, ``mask_item`` and other metadata description handling via YAML by `Synchon Mandal`_