From e3c54ec390122da1dc1c3a8fa4a498b7009e24e4 Mon Sep 17 00:00:00 2001 From: Ebrahim Ebrahim Date: Thu, 1 Aug 2024 11:51:23 -0400 Subject: [PATCH] Add table access to AbcdEvent (#35) --- pyproject.toml | 1 + src/abcdmicro/event.py | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b54781a..f704cdb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ "click >=8.1", "numpy >=1.26, <=1.26.4", "dipy >=1.9", + "pandas", "HD_BET @ https://github.com/brain-microstructure-exploration-tools/HD-BET/archive/refs/tags/v1.0.0.zip#sha256=d48908854207b839552f2059c9cf2a48819b847bc1eb0ea4445d1d589471a1f5", ] diff --git a/src/abcdmicro/event.py b/src/abcdmicro/event.py index b83c4d5..8a84e17 100644 --- a/src/abcdmicro/event.py +++ b/src/abcdmicro/event.py @@ -2,11 +2,14 @@ from dataclasses import dataclass from pathlib import Path +from typing import ClassVar + +import pandas as pd @dataclass class AbcdEvent: - """An ABCD event -- a particular subject and time point.""" + """An ABCD event -- a particular subject and time point from a particular ABCD data release.""" subject_id: str """The subject GUID defined in the NIMH Data Archive, for example 'NDAR_INV00U4FTRU'""" @@ -21,3 +24,32 @@ class AbcdEvent: tabular_data_path: Path """Path to the extracted ABCD tabular data directory. This would contain subdirectories like `core/mental-health/` with csv tables inside them.""" + + abcd_version: str + """Version of the ABCD dataset release, for example '5.1'.""" + + _tables: ClassVar[dict[str, dict[str, pd.DataFrame]]] = {} + """A mapping (ABCD version string) -> (relative table path) -> (loaded table)""" + + def get_table(self, table_relative_path: str) -> pd.DataFrame: + """Get a table, loading it from disk if it hasn't already been loaded. + + Args: + table_relative_path: The relative path of the table from the table root directory. + Example: 'core/mental-health/mh_p_pss.csv' + + Returns: The loaded table as a pandas DataFrame, + with subject ID and eventname as a multi-index. + """ + if self.abcd_version not in self._tables: + self._tables[self.abcd_version] = {} + path_to_table_mapping = self._tables[self.abcd_version] + if table_relative_path not in path_to_table_mapping: + table = pd.read_csv( + self.tabular_data_path / table_relative_path, + index_col=["src_subject_id", "eventname"], + ) + path_to_table_mapping[table_relative_path] = table + else: + table = path_to_table_mapping[table_relative_path] + return table