diff --git a/docs/source/bibliography.bib b/docs/source/bibliography.bib index 0679166a0..f94d86301 100644 --- a/docs/source/bibliography.bib +++ b/docs/source/bibliography.bib @@ -86,3 +86,80 @@ @inproceedings{SB-SAT publisher = {Association for Computing Machinery}, address = "Stuttgart, Germany", } + +@article{HBN, + title={An open resource for transdiagnostic research in pediatric mental health and learning disorders}, + author={Alexander, Lindsay M. and + Escalera, Jasmine and + Ai, Lei and + Andreotti, Charissa and + Febre, Karina and + Mangone, Alexander and + Vega-Potler, Natan and + Langer, Nicolas and + Alexander, Alexis and + Kovacs, Meagan and + Litke, Shannon and + O'Hagan, Bridget and + Andersen, Jennifer and + Bronstein, Batya and + Bui, Anastasia and + Bushey, Marijayne and + Butler, Henry and + Castagna, Victoria and + Camacho, Nicolas and + Chan, Elisha and + Citera, Danielle and + Clucas, Jon and + Cohen, Samantha and + Dufek, Sarah and + Eaves, Megan and + Fradera, Brian and + Gardner, Judith and + Grant-Villegas, Natalie and + Green, Gabriella and + Gregory, Camille and + Hart, Emily and + Harris, Shana and + Horton, Megan and + Kahn, Danielle and + Kabotyanski, Katherine and + Karmel, Bernard and + Kelly, Simon P. and + Kleinman, Kayla and + Koo, Bonhwang and + Kramer, Eliza and + Lennon, Elizabeth and + Lord, Catherine and + Mantello, Ginny and + Margolis, Amy and + Merikangas, Kathleen R. and + Milham, Judith and + Minniti, Giuseppe and + Neuhaus, Rebecca and + Levine, Alexandra and + Osman, Yael and + Parra, Lucas C. and + Pugh, Ken R. and + Racanello, Amy and + Restrepo, Anita and + Saltzman, Tian and + Septimus, Batya and + Tobe, Russell and + Waltz, Rachel and + Williams, Anna and + Yeo, Anna and +Castellanos, Francisco X. and +Klein, Arno and +Paus, Tomas and +Leventhal, Bennett L. and +Craddock, R. Cameron and +Koplewicz, Harold S. and +Milham, Michael P.}, + journal={Scientific data}, + volume={4}, + number={1}, + pages={1--26}, + year={2017}, + publisher={Nature Publishing Group} +} diff --git a/src/pymovements/datasets/__init__.py b/src/pymovements/datasets/__init__.py index b2f0a543e..5eda26d9d 100644 --- a/src/pymovements/datasets/__init__.py +++ b/src/pymovements/datasets/__init__.py @@ -28,6 +28,7 @@ pymovements.datasets.GazeBase pymovements.datasets.GazeBaseVR pymovements.datasets.GazeOnFaces + pymovements.datasets.HBN pymovements.datasets.JuDo1000 pymovements.datasets.SBSAT @@ -44,6 +45,7 @@ from pymovements.datasets.gaze_on_faces import GazeOnFaces from pymovements.datasets.gazebase import GazeBase from pymovements.datasets.gazebasevr import GazeBaseVR +from pymovements.datasets.hbn import HBN from pymovements.datasets.judo1000 import JuDo1000 from pymovements.datasets.sb_sat import SBSAT from pymovements.datasets.toy_dataset import ToyDataset @@ -54,6 +56,7 @@ 'GazeBase', 'GazeBaseVR', 'GazeOnFaces', + 'HBN', 'JuDo1000', 'SBSAT', 'ToyDataset', diff --git a/src/pymovements/datasets/hbn.py b/src/pymovements/datasets/hbn.py new file mode 100644 index 000000000..c976e5454 --- /dev/null +++ b/src/pymovements/datasets/hbn.py @@ -0,0 +1,151 @@ +# Copyright (c) 2022-2023 The pymovements Project Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +"""This module provides an interface to the HBN dataset.""" +from __future__ import annotations + +from dataclasses import dataclass +from dataclasses import field +from typing import Any + +import polars as pl + +from pymovements.dataset.dataset_definition import DatasetDefinition +from pymovements.dataset.dataset_library import register_dataset +from pymovements.gaze.experiment import Experiment + + +@dataclass +@register_dataset +class HBN(DatasetDefinition): + """HBN dataset :cite:p:`HBN`. + + This dataset consists of recordings from children + watching four different age-appropriate videos: (1) an + educational video clip (Fun with Fractals), (2) a short animated + film (The Present), (3) a short clip of an animated film (Despicable Me), + and (4) a trailer for a feature-length movie (Diary of a Wimpy Kid). + The eye gaze was recorded at a sampling rate of 120 Hz. + + Check the respective paper for details :cite:p:`HBN`. + + Attributes + ---------- + name : str + The name of the dataset. + + mirrors : tuple[str, ...] + A tuple of mirrors of the dataset. Each entry must be of type `str` and end with a '/'. + + resources : tuple[dict[str, str], ...] + A tuple of dataset resources. Each list entry must be a dictionary with the following keys: + - `resource`: The url suffix of the resource. This will be concatenated with the mirror. + - `filename`: The filename under which the file is saved as. + - `md5`: The MD5 checksum of the respective file. + + experiment : Experiment + The experiment definition. + + filename_format : str + Regular expression which will be matched before trying to load the file. Namedgroups will + appear in the `fileinfo` dataframe. + + filename_format_dtypes : dict[str, type], optional + If named groups are present in the `filename_format`, this makes it possible to cast + specific named groups to a particular datatype. + + column_map : dict[str, str] + The keys are the columns to read, the values are the names to which they should be renamed. + + custom_read_kwargs : dict[str, Any], optional + If specified, these keyword arguments will be passed to the file reading function. + + Examples + -------- + Initialize your :py:class:`~pymovements.PublicDataset` object with the + :py:class:`~pymovements.HBN` definition: + + >>> import pymovements as pm + >>> + >>> dataset = pm.Dataset("HBN", path='data/HBN') + + Download the dataset resources resources: + + >>> dataset.download()# doctest: +SKIP + + Load the data into memory: + + >>> dataset.load()# doctest: +SKIP + """ + + # pylint: disable=similarities + # The PublicDatasetDefinition child classes potentially share code chunks for definitions. + + name: str = 'HBN' + + mirrors: tuple[str, ...] = ( + 'https://files.osf.io/v1/resources/qknuv/providers/osfstorage/', + ) + + resources: tuple[dict[str, str], ...] = ( + { + 'resource': '651190031e76a453918a9971', + 'filename': 'data.zip', + 'md5': '2c523e911022ffc0eab700e34e9f7f30', + }, + ) + + experiment: Experiment = Experiment( + screen_width_px=800, + screen_height_px=600, + screen_width_cm=33.8, + screen_height_cm=27.0, + distance_cm=63.5, + origin='center', + sampling_rate=120, + ) + + filename_format: str = r'{subject_id:12}_{video_id}.csv' + + filename_format_dtypes: dict[str, type] = field( + default_factory=lambda: { + 'subject_id': str, + 'video_id': str, + }, + ) + + trial_columns: list[str] = field(default_factory=lambda: ['video_id']) + + time_column: str = 'time' + + pixel_columns: list[str] = field(default_factory=lambda: ['x_pix', 'y_pix']) + + column_map: dict[str, str] = field(default_factory=lambda: {}) + + custom_read_kwargs: dict[str, Any] = field( + default_factory=lambda: { + 'separator': ',', + 'columns': [ + 'time', 'x_pix', 'y_pix', + ], + 'dtypes': [ + pl.Float64, pl.Float64, pl.Float64, + ], + }, + ) diff --git a/src/pymovements/datasets/sb_sat.py b/src/pymovements/datasets/sb_sat.py index f6e09f968..774c1ebdb 100644 --- a/src/pymovements/datasets/sb_sat.py +++ b/src/pymovements/datasets/sb_sat.py @@ -17,7 +17,7 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -"""This module provides an interface to the GazeOnFaces dataset.""" +"""This module provides an interface to the SB-SAT dataset.""" from __future__ import annotations from dataclasses import dataclass diff --git a/tests/datasets/datasets_test.py b/tests/datasets/datasets_test.py index 02065e943..4d12e5d3f 100644 --- a/tests/datasets/datasets_test.py +++ b/tests/datasets/datasets_test.py @@ -32,6 +32,7 @@ pytest.param(pm.datasets.GazeBase, 'GazeBase', id='GazeBase'), pytest.param(pm.datasets.GazeBaseVR, 'GazeBaseVR', id='GazeBaseVR'), pytest.param(pm.datasets.GazeOnFaces, 'GazeOnFaces', id='GazeOnFaces'), + pytest.param(pm.datasets.HBN, 'HBN', id='HBN'), pytest.param(pm.datasets.JuDo1000, 'JuDo1000', id='JuDo1000'), pytest.param(pm.datasets.SBSAT, 'SBSAT', id='SBSAT'), ], @@ -49,6 +50,7 @@ def test_public_dataset_registered(definition_class, dataset_name): pytest.param(pm.datasets.GazeBase, id='GazeBase'), pytest.param(pm.datasets.GazeBaseVR, id='GazeBaseVR'), pytest.param(pm.datasets.GazeOnFaces, id='GazeOnFaces'), + pytest.param(pm.datasets.HBN, id='HBN'), pytest.param(pm.datasets.JuDo1000, id='JuDo1000'), pytest.param(pm.datasets.SBSAT, id='SBSAT'), ], diff --git a/tests/datasets/hbn_test.py b/tests/datasets/hbn_test.py new file mode 100644 index 000000000..7482732d9 --- /dev/null +++ b/tests/datasets/hbn_test.py @@ -0,0 +1,79 @@ +# Copyright (c) 2023 The pymovements Project Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +"""Test all functionality in pymovements.dataset.hbn.""" +from pathlib import Path + +import pytest + +import pymovements as pm + + +@pytest.mark.parametrize( + 'init_path, expected_paths', + [ + pytest.param( + '/data/set/path', + { + 'root': Path('/data/set/path/'), + 'dataset': Path('/data/set/path/'), + 'download': Path('/data/set/path/downloads'), + }, + ), + pytest.param( + pm.DatasetPaths(root='/data/set/path'), + { + 'root': Path('/data/set/path/'), + 'dataset': Path('/data/set/path/HBN'), + 'download': Path('/data/set/path/HBN/downloads'), + }, + ), + pytest.param( + pm.DatasetPaths(root='/data/set/path', dataset='.'), + { + 'root': Path('/data/set/path/'), + 'dataset': Path('/data/set/path/'), + 'download': Path('/data/set/path/downloads'), + }, + ), + pytest.param( + pm.DatasetPaths(root='/data/set/path', dataset='dataset'), + { + 'root': Path('/data/set/path/'), + 'dataset': Path('/data/set/path/dataset'), + 'download': Path('/data/set/path/dataset/downloads'), + }, + ), + pytest.param( + pm.DatasetPaths(root='/data/set/path', downloads='custom_downloads'), + { + 'root': Path('/data/set/path/'), + 'dataset': Path('/data/set/path/HBN'), + 'download': Path('/data/set/path/HBN/custom_downloads'), + }, + ), + ], +) +def test_paths(init_path, expected_paths): + dataset = pm.Dataset(pm.datasets.HBN, path=init_path) + + assert dataset.paths.root == expected_paths['root'] + assert dataset.path == expected_paths['dataset'] + assert dataset.paths.dataset == expected_paths['dataset'] + assert dataset.paths.downloads == expected_paths['download']