Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add HBN dataset #576

Merged
merged 37 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
d23b81f
added from_csv (#504)
Sep 15, 2023
bf9f7d8
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 15, 2023
83e6629
Merge https://github.com/aeye-lab/pymovements into feature/gaze_from_csv
Sep 15, 2023
4397cb9
fix docstring error
Sep 15, 2023
5aa980a
Merge branch 'main' into feature/gaze_from_csv
prassepaul Sep 15, 2023
e4575b5
Merge branch 'feature/gaze_from_csv' of https://github.com/aeye-lab/p…
Sep 15, 2023
aa994f0
docstring error
Sep 15, 2023
16e3946
flake8 issue
Sep 15, 2023
1a0fb31
Merge branch 'main' into feature/gaze_from_csv
prassepaul Sep 15, 2023
493efdb
cyclic import
Sep 15, 2023
926e27c
added import to docstring
Sep 19, 2023
9b074ae
added dataset gaze_on_faces
Sep 21, 2023
bddc5f4
Merge branch 'main' into feature/feature-from_ipc
prassepaul Sep 22, 2023
c026c14
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 22, 2023
12f437a
from_ipc
Sep 22, 2023
57b65d5
Merge branch 'feature/feature-from_ipc' of https://github.com/aeye-la…
Sep 22, 2023
0370b63
error in path
Sep 22, 2023
a45c1f3
unused arguments
Sep 22, 2023
1366a75
bug fix
Sep 22, 2023
3e50091
fixed time column in feather file
Sep 22, 2023
0837f19
Merge branch 'main' into feature/feature-from_ipc
prassepaul Sep 22, 2023
6de384b
requested changes
Sep 22, 2023
8bb3047
sbset dataset
Sep 25, 2023
6515bf6
flake, pyling, mypy
Sep 25, 2023
0c3be30
Merge branch 'main' into feature/dataset-sbsat
prassepaul Sep 25, 2023
cc09b6f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 25, 2023
16a0b8b
bib
Sep 25, 2023
3c55d0a
Merge branch 'feature/dataset-sbsat' of https://github.com/aeye-lab/p…
Sep 25, 2023
61d7ceb
hbn dataset
Sep 25, 2023
dae4725
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 25, 2023
fdfb0c2
added group
Sep 26, 2023
faa77cc
Merge branch 'feature/dataset-hbn' of https://github.com/aeye-lab/pym…
Sep 26, 2023
e72abf2
Merge branch 'main' into feature/dataset-hbn
SiQube Sep 26, 2023
372325d
Merge branch 'main' into feature/dataset-hbn
SiQube Sep 26, 2023
db90f53
Merge branch 'main' into feature/dataset-hbn
prassepaul Sep 27, 2023
d95a5a1
requested changes
Sep 28, 2023
aa0f2a9
Merge branch 'main' into feature/dataset-hbn
prassepaul Sep 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions docs/source/bibliography.bib
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,80 @@ @inproceedings{SB-SAT
publisher = {Association for Computing Machinery},
address = "Stuttgart, Germany",
}

@article{HBN,
title={An open resource for transdiagnostic research in pediatric mental health and learning disorders},
author={Alexander, Lindsay M. and
Escalera, Jasmine and
Ai, Lei and
Andreotti, Charissa and
Febre, Karina and
Mangone, Alexander and
Vega-Potler, Natan and
Langer, Nicolas and
Alexander, Alexis and
Kovacs, Meagan and
Litke, Shannon and
O'Hagan, Bridget and
Andersen, Jennifer and
Bronstein, Batya and
Bui, Anastasia and
Bushey, Marijayne and
Butler, Henry and
Castagna, Victoria and
Camacho, Nicolas and
Chan, Elisha and
Citera, Danielle and
Clucas, Jon and
Cohen, Samantha and
Dufek, Sarah and
Eaves, Megan and
Fradera, Brian and
Gardner, Judith and
Grant-Villegas, Natalie and
Green, Gabriella and
Gregory, Camille and
Hart, Emily and
Harris, Shana and
Horton, Megan and
Kahn, Danielle and
Kabotyanski, Katherine and
Karmel, Bernard and
Kelly, Simon P. and
Kleinman, Kayla and
Koo, Bonhwang and
Kramer, Eliza and
Lennon, Elizabeth and
Lord, Catherine and
Mantello, Ginny and
Margolis, Amy and
Merikangas, Kathleen R. and
Milham, Judith and
Minniti, Giuseppe and
Neuhaus, Rebecca and
Levine, Alexandra and
Osman, Yael and
Parra, Lucas C. and
Pugh, Ken R. and
Racanello, Amy and
Restrepo, Anita and
Saltzman, Tian and
Septimus, Batya and
Tobe, Russell and
Waltz, Rachel and
Williams, Anna and
Yeo, Anna and
Castellanos, Francisco X. and
Klein, Arno and
Paus, Tomas and
Leventhal, Bennett L. and
Craddock, R. Cameron and
Koplewicz, Harold S. and
Milham, Michael P.},
journal={Scientific data},
volume={4},
number={1},
pages={1--26},
year={2017},
publisher={Nature Publishing Group}
}
3 changes: 3 additions & 0 deletions src/pymovements/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
pymovements.datasets.GazeBase
pymovements.datasets.GazeBaseVR
pymovements.datasets.GazeOnFaces
pymovements.datasets.HBN
pymovements.datasets.JuDo1000
pymovements.datasets.SBSAT

Expand All @@ -44,6 +45,7 @@
from pymovements.datasets.gaze_on_faces import GazeOnFaces
from pymovements.datasets.gazebase import GazeBase
from pymovements.datasets.gazebasevr import GazeBaseVR
from pymovements.datasets.hbn import HBN
from pymovements.datasets.judo1000 import JuDo1000
from pymovements.datasets.sb_sat import SBSAT
from pymovements.datasets.toy_dataset import ToyDataset
Expand All @@ -54,6 +56,7 @@
'GazeBase',
'GazeBaseVR',
'GazeOnFaces',
'HBN',
'JuDo1000',
'SBSAT',
'ToyDataset',
Expand Down
151 changes: 151 additions & 0 deletions src/pymovements/datasets/hbn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# Copyright (c) 2022-2023 The pymovements Project Authors
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""This module provides an interface to the HBN dataset."""
from __future__ import annotations

from dataclasses import dataclass
from dataclasses import field
from typing import Any

import polars as pl

from pymovements.dataset.dataset_definition import DatasetDefinition
from pymovements.dataset.dataset_library import register_dataset
from pymovements.gaze.experiment import Experiment


@dataclass
@register_dataset
class HBN(DatasetDefinition):
"""HBN dataset :cite:p:`HBN`.

This dataset consists of recordings from children
watching four different age-appropriate videos: (1) an
educational video clip (Fun with Fractals), (2) a short animated
film (The Present), (3) a short clip of an animated film (Despicable Me),
and (4) a trailer for a feature-length movie (Diary of a Wimpy Kid).
The eye gaze was recorded at a sampling rate of 120 Hz.

Check the respective paper for details :cite:p:`HBN`.

Attributes
----------
name : str
The name of the dataset.

mirrors : tuple[str, ...]
A tuple of mirrors of the dataset. Each entry must be of type `str` and end with a '/'.

resources : tuple[dict[str, str], ...]
A tuple of dataset resources. Each list entry must be a dictionary with the following keys:
- `resource`: The url suffix of the resource. This will be concatenated with the mirror.
- `filename`: The filename under which the file is saved as.
- `md5`: The MD5 checksum of the respective file.

experiment : Experiment
The experiment definition.

filename_format : str
Regular expression which will be matched before trying to load the file. Namedgroups will
appear in the `fileinfo` dataframe.

filename_format_dtypes : dict[str, type], optional
If named groups are present in the `filename_format`, this makes it possible to cast
specific named groups to a particular datatype.

column_map : dict[str, str]
The keys are the columns to read, the values are the names to which they should be renamed.

custom_read_kwargs : dict[str, Any], optional
If specified, these keyword arguments will be passed to the file reading function.

Examples
--------
Initialize your :py:class:`~pymovements.PublicDataset` object with the
:py:class:`~pymovements.HBN` definition:

>>> import pymovements as pm
>>>
>>> dataset = pm.Dataset("HBN", path='data/HBN')

Download the dataset resources resources:

>>> dataset.download()# doctest: +SKIP

Load the data into memory:

>>> dataset.load()# doctest: +SKIP
"""

# pylint: disable=similarities
# The PublicDatasetDefinition child classes potentially share code chunks for definitions.

name: str = 'HBN'

mirrors: tuple[str, ...] = (
'https://files.osf.io/v1/resources/qknuv/providers/osfstorage/',
)

resources: tuple[dict[str, str], ...] = (
{
'resource': '651190031e76a453918a9971',
'filename': 'data.zip',
'md5': '2c523e911022ffc0eab700e34e9f7f30',
},
)

experiment: Experiment = Experiment(
screen_width_px=800,
screen_height_px=600,
screen_width_cm=33.8,
screen_height_cm=27.0,
distance_cm=63.5,
origin='center',
sampling_rate=120,
)

filename_format: str = r'{subject_id:12}_{video_id}.csv'

filename_format_dtypes: dict[str, type] = field(
default_factory=lambda: {
'subject_id': str,
'video_id': str,
},
)

trial_columns: list[str] = field(default_factory=lambda: ['video_id'])

time_column: str = 'time'

pixel_columns: list[str] = field(default_factory=lambda: ['x_pix', 'y_pix'])

column_map: dict[str, str] = field(default_factory=lambda: {})

custom_read_kwargs: dict[str, Any] = field(
default_factory=lambda: {
'separator': ',',
'columns': [
'time', 'x_pix', 'y_pix',
],
'dtypes': [
pl.Float64, pl.Float64, pl.Float64,
],
},
)
2 changes: 1 addition & 1 deletion src/pymovements/datasets/sb_sat.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""This module provides an interface to the GazeOnFaces dataset."""
"""This module provides an interface to the SB-SAT dataset."""
from __future__ import annotations

from dataclasses import dataclass
Expand Down
2 changes: 2 additions & 0 deletions tests/datasets/datasets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
pytest.param(pm.datasets.GazeBase, 'GazeBase', id='GazeBase'),
pytest.param(pm.datasets.GazeBaseVR, 'GazeBaseVR', id='GazeBaseVR'),
pytest.param(pm.datasets.GazeOnFaces, 'GazeOnFaces', id='GazeOnFaces'),
pytest.param(pm.datasets.HBN, 'HBN', id='HBN'),
pytest.param(pm.datasets.JuDo1000, 'JuDo1000', id='JuDo1000'),
pytest.param(pm.datasets.SBSAT, 'SBSAT', id='SBSAT'),
],
Expand All @@ -49,6 +50,7 @@ def test_public_dataset_registered(definition_class, dataset_name):
pytest.param(pm.datasets.GazeBase, id='GazeBase'),
pytest.param(pm.datasets.GazeBaseVR, id='GazeBaseVR'),
pytest.param(pm.datasets.GazeOnFaces, id='GazeOnFaces'),
pytest.param(pm.datasets.HBN, id='HBN'),
pytest.param(pm.datasets.JuDo1000, id='JuDo1000'),
pytest.param(pm.datasets.SBSAT, id='SBSAT'),
],
Expand Down
79 changes: 79 additions & 0 deletions tests/datasets/hbn_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Copyright (c) 2023 The pymovements Project Authors
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""Test all functionality in pymovements.dataset.hbn."""
from pathlib import Path

import pytest

import pymovements as pm


@pytest.mark.parametrize(
'init_path, expected_paths',
[
pytest.param(
'/data/set/path',
{
'root': Path('/data/set/path/'),
'dataset': Path('/data/set/path/'),
'download': Path('/data/set/path/downloads'),
},
),
pytest.param(
pm.DatasetPaths(root='/data/set/path'),
{
'root': Path('/data/set/path/'),
'dataset': Path('/data/set/path/HBN'),
'download': Path('/data/set/path/HBN/downloads'),
},
),
pytest.param(
pm.DatasetPaths(root='/data/set/path', dataset='.'),
{
'root': Path('/data/set/path/'),
'dataset': Path('/data/set/path/'),
'download': Path('/data/set/path/downloads'),
},
),
pytest.param(
pm.DatasetPaths(root='/data/set/path', dataset='dataset'),
{
'root': Path('/data/set/path/'),
'dataset': Path('/data/set/path/dataset'),
'download': Path('/data/set/path/dataset/downloads'),
},
),
pytest.param(
pm.DatasetPaths(root='/data/set/path', downloads='custom_downloads'),
{
'root': Path('/data/set/path/'),
'dataset': Path('/data/set/path/HBN'),
'download': Path('/data/set/path/HBN/custom_downloads'),
},
),
],
)
def test_paths(init_path, expected_paths):
dataset = pm.Dataset(pm.datasets.HBN, path=init_path)

assert dataset.paths.root == expected_paths['root']
assert dataset.path == expected_paths['dataset']
assert dataset.paths.dataset == expected_paths['dataset']
assert dataset.paths.downloads == expected_paths['download']