Skip to content

Commit

Permalink
Merge pull request #308 from juaml/refactor/dg-patterns
Browse files Browse the repository at this point in the history
[ENH]: Improving DataGrabber `patterns`
  • Loading branch information
synchon authored Apr 4, 2024
2 parents b2df91c + 3e89413 commit a9e799c
Show file tree
Hide file tree
Showing 26 changed files with 1,357 additions and 739 deletions.
1 change: 1 addition & 0 deletions docs/changes/newsfragments/308.enh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve :class:`.PatternDataGrabber` and :class:`.PatternDataladDataGrabber`'s ``patterns`` to enable ``space``, ``format``, ``mask_item`` and other metadata description handling via YAML by `Synchon Mandal`_
105 changes: 75 additions & 30 deletions docs/extending/datagrabber.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Now that we have our element defined, we need to think about the structure of
the dataset. Mainly, because the structure of the dataset will determine how
the DataGrabber needs to be implemented.

``junifer`` provides an abstract class to deal with datasets that can be thought
``junifer`` provides a concrete class to deal with datasets that can be thought
in terms of *patterns*. A *pattern* is a string that contains placeholders that
are replaced by the actual values of the element. In our BIDS example, the path
to the T1w image of subject ``sub-01`` and session ``ses-01``, relative to the
Expand Down Expand Up @@ -98,13 +98,14 @@ Step 3: Create a Data Grabber
Option A: Extending from PatternDataGrabber
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The :class:`.PatternDataGrabber` class is an abstract class that has the
The :class:`.PatternDataGrabber` class is a concrete class that has the
functionality of understanding patterns embedded in it.

Before creating the DataGrabber, we need to define 3 variables:

* ``types``: A list with the available :ref:`data_types` in our dataset.
* ``patterns``: A dictionary that specifies the pattern for each data type.
* ``patterns``: A dictionary that specifies the pattern and some additional
information for each data type.
* ``replacements``: A list indicating which of the elements in the patterns
should be replaced by the values of the element.

Expand All @@ -114,8 +115,14 @@ For example, in our BIDS example, the variables will be:
types = ["T1w", "BOLD"]
patterns = {
"T1w": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
"BOLD": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
"T1w": {
"pattern": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
"space": "native",
},
"BOLD": {
"pattern": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
"space": "MNI152NLin6Asym",
},
}
replacements = ["subject", "session"]
Expand All @@ -141,8 +148,14 @@ With the variables defined above, we can create our DataGrabber and name it
def __init__(self, datadir: str | Path) -> None:
types = ["T1w", "BOLD"]
patterns = {
"T1w": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
"BOLD": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
"T1w": {
"pattern": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
"space": "native",
},
"BOLD": {
"pattern": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
"space": "MNI152NLin6Asym",
},
}
replacements = ["subject", "session"]
super().__init__(
Expand Down Expand Up @@ -171,8 +184,14 @@ use the :func:`.register_datagrabber` decorator.
def __init__(self, datadir: str | Path) -> None:
types = ["T1w", "BOLD"]
patterns = {
"T1w": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
"BOLD": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
"T1w": {
"pattern": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
"space": "native",
},
"BOLD": {
"pattern": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
"space": "MNI152NLin6Asym",
},
}
replacements = ["subject", "session"]
super().__init__(
Expand Down Expand Up @@ -252,8 +271,14 @@ And we can create our DataGrabber:
def __init__(self) -> None:
types = ["T1w", "BOLD"]
patterns = {
"T1w": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
"BOLD": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
"T1w": {
"pattern": "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
"space": "native",
},
"BOLD": {
"pattern": "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
"space": "MNI152NLin6Asym",
},
}
replacements = ["subject", "session"]
uri = "https://gin.g-node.org/juaml/datalad-example-bids"
Expand All @@ -277,13 +302,17 @@ This approach can be used directly from the YAML, like so:
- BOLD
- T1w
patterns:
BOLD: "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz"
T1w: "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz"
BOLD:
pattern: "{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz"
space: MNI152NLin6Asym
T1w:
pattern: "{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz"
space: native
replacements:
- subject
- session
uri: "https://gin.g-node.org/juaml/datalad-example-bids"
rootdir: "example_bids_ses"
rootdir: example_bids_ses
.. _extending_datagrabbers_base:

Expand Down Expand Up @@ -314,10 +343,16 @@ and ``session``, we will use them as parameters of ``get_item``:

.. code-block:: python
def get_item(self, subject: str, session: str) -> dict[str, str]:
def get_item(self, subject: str, session: str) -> dict[str, dict[str, str]]:
out = {
"T1w": f"{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
"BOLD": f"{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
"T1w": {
"path": f"{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
"space": "native",
},
"BOLD": {
"path": f"{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
"space": "MNI152NLin6Asym",
},
}
return out
Expand Down Expand Up @@ -367,12 +402,18 @@ So, to summarise, our DataGrabber will look like this:
@register_datagrabber
class ExampleBIDSDataGrabber(BaseDataGrabber):
def get_item(self, subject: str, session: str) -> dict[str, str]:
def get_item(self, subject: str, session: str) -> dict[str, dict[str, str]]:
out = {
"T1w": f"{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
"BOLD": f"{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
"T1w": {
"path": f"{subject}/{session}/anat/{subject}_{session}_T1w.nii.gz",
"space": "native",
},
"BOLD": {
"path": f"{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
"space": "MNI152NLin6Asym",
},
}
return out
return out
def get_elements(self) -> list[str]:
subjects = ["sub-01", "sub-02", "sub-03"]
Expand Down Expand Up @@ -438,16 +479,20 @@ this:
self, subject: str, session: str
) -> dict:
out = {
"BOLD": f"{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
"BOLD": {
"path": f"{subject}/{session}/func/{subject}_{session}_task-rest_bold.nii.gz",
"space": "MNI152NLin6Asym",
},
"BOLD_confounds": {
"path": f"{subject}/{session}/func/{subject}_{session}_confounds.tsv",
"format": "adhoc",
"mappings": {
"fmriprep": {
"variable1": "rot_x",
"variable2": "rot_z",
"variable3": "rot_y",
}
"path": f"{subject}/{session}/func/{subject}_{session}_confounds.tsv",
"format": "adhoc",
"mappings": {
"fmriprep": {
"variable1": "rot_x",
"variable2": "rot_z",
"variable3": "rot_y",
},
},
},
}
Expand Down
3 changes: 3 additions & 0 deletions docs/understanding/data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ Data Types
* - ``T1w``
- T1w image (3D)
- Preprocessed or Raw T1w image
* - ``T2w``
- T2w image (3D)
- Preprocessed or Raw T2w image
* - ``BOLD``
- BOLD image (4D)
- Preprocessed or Denoised BOLD image (fMRIPrep output)
Expand Down
10 changes: 8 additions & 2 deletions examples/run_datagrabber_bids_datalad.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,14 @@
# replaced in the patterns.
types = ["T1w", "BOLD"]
patterns = {
"T1w": "{subject}/anat/{subject}_T1w.nii.gz",
"BOLD": "{subject}/func/{subject}_task-rest_bold.nii.gz",
"T1w": {
"pattern": "{subject}/anat/{subject}_T1w.nii.gz",
"space": "native",
},
"BOLD": {
"pattern": "{subject}/func/{subject}_task-rest_bold.nii.gz",
"space": "MNI152NLin6Asym",
},
}
replacements = ["subject"]
###############################################################################
Expand Down
7 changes: 6 additions & 1 deletion junifer/configs/juseless/datagrabbers/aomic_id1000_vbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@ def __init__(self, datadir: Union[str, Path, None] = None) -> None:
types = ["VBM_GM"]
replacements = ["subject"]
patterns = {
"VBM_GM": "sub-{subject}/mri/mwp1sub-{subject}_run-2_T1w.nii.gz",
"VBM_GM": {
"pattern": (
"sub-{subject}/mri/mwp1sub-{subject}_run-2_T1w.nii.gz"
),
"space": "IXI549Space",
},
}
super().__init__(
types=types,
Expand Down
7 changes: 6 additions & 1 deletion junifer/configs/juseless/datagrabbers/camcan_vbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,12 @@ def __init__(self, datadir: Union[str, Path, None] = None) -> None:
)
types = ["VBM_GM"]
replacements = ["subject"]
patterns = {"VBM_GM": "sub-{subject}/mri/m0wp1sub-{subject}.nii.gz"}
patterns = {
"VBM_GM": {
"pattern": "sub-{subject}/mri/m0wp1sub-{subject}.nii.gz",
"space": "IXI549Space",
},
}
super().__init__(
types=types,
datadir=datadir,
Expand Down
7 changes: 6 additions & 1 deletion junifer/configs/juseless/datagrabbers/ixi_vbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,12 @@ def __init__(
types = ["VBM_GM"]
replacements = ["site", "subject"]
patterns = {
"VBM_GM": "{site}/sub-{subject}/mri/m0wp1sub-{subject}.nii.gz"
"VBM_GM": {
"pattern": (
"{site}/sub-{subject}/mri/m0wp1sub-{subject}.nii.gz"
),
"space": "IXI549Space",
},
}

# validate and/or transform 'site' input
Expand Down
66 changes: 42 additions & 24 deletions junifer/configs/juseless/datagrabbers/ucla.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,30 +70,48 @@ def __init__(
self.tasks = tasks
# The patterns
patterns = {
"BOLD": (
"sub-{subject}/func/sub-{subject}_task-{task}_bold_space-"
"MNI152NLin2009cAsym_preproc.nii.gz"
),
"BOLD_confounds": (
"sub-{subject}/func/sub-{subject}_"
"task-{task}_bold_confounds.tsv"
),
"T1w": (
"sub-{subject}/anat/sub-{subject}_"
"T1w_space-MNI152NLin2009cAsym_preproc.nii.gz"
),
"probseg_CSF": (
"sub-{subject}/anat/sub-{subject}_T1w_space-"
"MNI152NLin2009cAsym_class-CSF_probtissue.nii.gz"
),
"probseg_GM": (
"sub-{subject}/anat/sub-{subject}_T1w_space-"
"MNI152NLin2009cAsym_class-GM_probtissue.nii.gz"
),
"probseg_WM": (
"sub-{subject}/anat/sub-{subject}_T1w_space"
"-MNI152NLin2009cAsym_class-WM_probtissue.nii.gz"
),
"BOLD": {
"pattern": (
"sub-{subject}/func/sub-{subject}_task-{task}_bold_space-"
"MNI152NLin2009cAsym_preproc.nii.gz"
),
"space": "MNI152NLin2009cAsym",
},
"BOLD_confounds": {
"pattern": (
"sub-{subject}/func/sub-{subject}_"
"task-{task}_bold_confounds.tsv"
),
"space": "fmriprep",
},
"T1w": {
"pattern": (
"sub-{subject}/anat/sub-{subject}_"
"T1w_space-MNI152NLin2009cAsym_preproc.nii.gz"
),
"space": "MNI152NLin2009cAsym",
},
"probseg_CSF": {
"pattern": (
"sub-{subject}/anat/sub-{subject}_T1w_space-"
"MNI152NLin2009cAsym_class-CSF_probtissue.nii.gz"
),
"space": "MNI152NLin2009cAsym",
},
"probseg_GM": {
"pattern": (
"sub-{subject}/anat/sub-{subject}_T1w_space-"
"MNI152NLin2009cAsym_class-GM_probtissue.nii.gz"
),
"space": "MNI152NLin2009cAsym",
},
"probseg_WM": {
"pattern": (
"sub-{subject}/anat/sub-{subject}_T1w_space"
"-MNI152NLin2009cAsym_class-WM_probtissue.nii.gz"
),
"space": "MNI152NLin2009cAsym",
},
}
# Set default types
if types is None:
Expand Down
7 changes: 6 additions & 1 deletion junifer/configs/juseless/datagrabbers/ukb_vbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,12 @@ def __init__(self, datadir: Union[str, Path, None] = None) -> None:
rootdir = "m0wp1"
types = ["VBM_GM"]
replacements = ["subject", "session"]
patterns = {"VBM_GM": "m0wp1sub-{subject}_ses-{session}_T1w.nii.gz"}
patterns = {
"VBM_GM": {
"pattern": "m0wp1sub-{subject}_ses-{session}_T1w.nii.gz",
"space": "IXI549Space",
},
}
super().__init__(
types=types,
datadir=datadir,
Expand Down
Loading

0 comments on commit a9e799c

Please sign in to comment.