Skip to content

Commit

Permalink
Add table access to AbcdEvent (#35)
Browse files Browse the repository at this point in the history
  • Loading branch information
ebrahimebrahim committed Aug 1, 2024
1 parent e3be9ff commit e3c54ec
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 1 deletion.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ dependencies = [
"click >=8.1",
"numpy >=1.26, <=1.26.4",
"dipy >=1.9",
"pandas",
"HD_BET @ https://github.com/brain-microstructure-exploration-tools/HD-BET/archive/refs/tags/v1.0.0.zip#sha256=d48908854207b839552f2059c9cf2a48819b847bc1eb0ea4445d1d589471a1f5",
]

Expand Down
34 changes: 33 additions & 1 deletion src/abcdmicro/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@

from dataclasses import dataclass
from pathlib import Path
from typing import ClassVar

import pandas as pd


@dataclass
class AbcdEvent:
"""An ABCD event -- a particular subject and time point."""
"""An ABCD event -- a particular subject and time point from a particular ABCD data release."""

subject_id: str
"""The subject GUID defined in the NIMH Data Archive, for example 'NDAR_INV00U4FTRU'"""
Expand All @@ -21,3 +24,32 @@ class AbcdEvent:
tabular_data_path: Path
"""Path to the extracted ABCD tabular data directory. This would contain subdirectories
like `core/mental-health/` with csv tables inside them."""

abcd_version: str
"""Version of the ABCD dataset release, for example '5.1'."""

_tables: ClassVar[dict[str, dict[str, pd.DataFrame]]] = {}
"""A mapping (ABCD version string) -> (relative table path) -> (loaded table)"""

def get_table(self, table_relative_path: str) -> pd.DataFrame:
"""Get a table, loading it from disk if it hasn't already been loaded.
Args:
table_relative_path: The relative path of the table from the table root directory.
Example: 'core/mental-health/mh_p_pss.csv'
Returns: The loaded table as a pandas DataFrame,
with subject ID and eventname as a multi-index.
"""
if self.abcd_version not in self._tables:
self._tables[self.abcd_version] = {}
path_to_table_mapping = self._tables[self.abcd_version]
if table_relative_path not in path_to_table_mapping:
table = pd.read_csv(
self.tabular_data_path / table_relative_path,
index_col=["src_subject_id", "eventname"],
)
path_to_table_mapping[table_relative_path] = table
else:
table = path_to_table_mapping[table_relative_path]
return table

0 comments on commit e3c54ec

Please sign in to comment.