From f89a4f4b25ff869e57a544e8fd9bef0cddb24da6 Mon Sep 17 00:00:00 2001 From: prassepaul Date: Tue, 26 Sep 2023 18:45:03 +0200 Subject: [PATCH] Feature/dataset sbsat (#575) * added from_csv (#504) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix docstring error * docstring error * flake8 issue * cyclic import * added import to docstring * added dataset gaze_on_faces * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * from_ipc * error in path * unused arguments * bug fix * fixed time column in feather file * requested changes * sbset dataset * flake, pyling, mypy * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * bib --------- Co-authored-by: prassepaul Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: David R. Reich <43832476+SiQube@users.noreply.github.com> --- docs/source/bibliography.bib | 10 ++ src/pymovements/datasets/__init__.py | 3 + src/pymovements/datasets/gaze_on_faces.py | 2 +- src/pymovements/datasets/sb_sat.py | 151 ++++++++++++++++++++++ tests/datasets/datasets_test.py | 2 + tests/datasets/sbsat_test.py | 79 +++++++++++ 6 files changed, 246 insertions(+), 1 deletion(-) create mode 100644 src/pymovements/datasets/sb_sat.py create mode 100644 tests/datasets/sbsat_test.py diff --git a/docs/source/bibliography.bib b/docs/source/bibliography.bib index f240e2163..0679166a0 100644 --- a/docs/source/bibliography.bib +++ b/docs/source/bibliography.bib @@ -76,3 +76,13 @@ @article{GazeOnFaces year={2016}, publisher={The Association for Research in Vision and Ophthalmology}, } + +@inproceedings{SB-SAT, + title = {Towards predicting reading comprehension from gaze behavior}, + year = {2020}, + booktitle = {Proceedings of the ACM Symposium on Eye Tracking Research and Applications}, + author = {Ahn, Seoyoung and Kelton, Conor and Balasubramanian, Aruna and Zelinsky, Greg}, + pages = {1--5}, + publisher = {Association for Computing Machinery}, + address = "Stuttgart, Germany", +} diff --git a/src/pymovements/datasets/__init__.py b/src/pymovements/datasets/__init__.py index 75597101c..b2f0a543e 100644 --- a/src/pymovements/datasets/__init__.py +++ b/src/pymovements/datasets/__init__.py @@ -29,6 +29,7 @@ pymovements.datasets.GazeBaseVR pymovements.datasets.GazeOnFaces pymovements.datasets.JuDo1000 + pymovements.datasets.SBSAT .. rubric:: Example Datasets @@ -44,6 +45,7 @@ from pymovements.datasets.gazebase import GazeBase from pymovements.datasets.gazebasevr import GazeBaseVR from pymovements.datasets.judo1000 import JuDo1000 +from pymovements.datasets.sb_sat import SBSAT from pymovements.datasets.toy_dataset import ToyDataset from pymovements.datasets.toy_dataset_eyelink import ToyDatasetEyeLink @@ -53,6 +55,7 @@ 'GazeBaseVR', 'GazeOnFaces', 'JuDo1000', + 'SBSAT', 'ToyDataset', 'ToyDatasetEyeLink', ] diff --git a/src/pymovements/datasets/gaze_on_faces.py b/src/pymovements/datasets/gaze_on_faces.py index 0a5795dfe..7ed3d125f 100644 --- a/src/pymovements/datasets/gaze_on_faces.py +++ b/src/pymovements/datasets/gaze_on_faces.py @@ -34,7 +34,7 @@ @dataclass @register_dataset class GazeOnFaces(DatasetDefinition): - """GazeBaseVR dataset :cite:p:`GazeOnFaces`. + """GazeOnFaces dataset :cite:p:`GazeOnFaces`. This dataset includes monocular eye tracking data from single participants in a single session. Eye movements are recorded at a sampling frequency of 60 Hz diff --git a/src/pymovements/datasets/sb_sat.py b/src/pymovements/datasets/sb_sat.py new file mode 100644 index 000000000..f6e09f968 --- /dev/null +++ b/src/pymovements/datasets/sb_sat.py @@ -0,0 +1,151 @@ +# Copyright (c) 2022-2023 The pymovements Project Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +"""This module provides an interface to the GazeOnFaces dataset.""" +from __future__ import annotations + +from dataclasses import dataclass +from dataclasses import field +from typing import Any + +import polars as pl + +from pymovements.dataset.dataset_definition import DatasetDefinition +from pymovements.dataset.dataset_library import register_dataset +from pymovements.gaze.experiment import Experiment + + +@dataclass +@register_dataset +class SBSAT(DatasetDefinition): + """SB-SAT dataset :cite:p:`SB-SAT`. + + This dataset includes monocular eye tracking data from a single participants in a single + session. Eye movements are recorded at a sampling frequency of 1,000 Hz using an EyeLink 1000 + eye tracker and are provided as pixel coordinates. + + The participant is instructed to read texts and answer questions. + + Check the respective paper for details :cite:p:`SB-SAT`. + + Attributes + ---------- + name : str + The name of the dataset. + + mirrors : tuple[str, ...] + A tuple of mirrors of the dataset. Each entry must be of type `str` and end with a '/'. + + resources : tuple[dict[str, str], ...] + A tuple of dataset resources. Each list entry must be a dictionary with the following keys: + - `resource`: The url suffix of the resource. This will be concatenated with the mirror. + - `filename`: The filename under which the file is saved as. + - `md5`: The MD5 checksum of the respective file. + + experiment : Experiment + The experiment definition. + + filename_format : str + Regular expression which will be matched before trying to load the file. Namedgroups will + appear in the `fileinfo` dataframe. + + filename_format_dtypes : dict[str, type], optional + If named groups are present in the `filename_format`, this makes it possible to cast + specific named groups to a particular datatype. + + column_map : dict[str, str] + The keys are the columns to read, the values are the names to which they should be renamed. + + custom_read_kwargs : dict[str, Any], optional + If specified, these keyword arguments will be passed to the file reading function. + + Examples + -------- + Initialize your :py:class:`~pymovements.PublicDataset` object with the + :py:class:`~pymovements.GazeOnFaces` definition: + + >>> import pymovements as pm + >>> + >>> dataset = pm.Dataset("SBSAT", path='data/SBSAT') + + Download the dataset resources resources: + + >>> dataset.download()# doctest: +SKIP + + Load the data into memory: + + >>> dataset.load()# doctest: +SKIP + """ + + # pylint: disable=similarities + # The PublicDatasetDefinition child classes potentially share code chunks for definitions. + + name: str = 'SBSAT' + + mirrors: tuple[str, ...] = ( + 'https://files.de-1.osf.io/v1/resources/cdx69/providers/osfstorage/', + ) + + resources: tuple[dict[str, str], ...] = ( + { + 'resource': '64525979230ea6163c031267/?zip=', + 'filename': 'csvs.zip', + 'md5': '3cf074c93266b723437cf887f948c993', + }, + ) + + experiment: Experiment = Experiment( + screen_width_px=768, + screen_height_px=1024, + screen_width_cm=42.4, + screen_height_cm=44.5, + distance_cm=70, + origin='center', + sampling_rate=1000, + ) + + filename_format: str = r'msd{subject_id:d}.csv' + + filename_format_dtypes: dict[str, type] = field( + default_factory=lambda: { + 'subject_id': int, + }, + ) + + trial_columns: list[str] = field(default_factory=lambda: ['book_name', 'screen_id']) + + time_column: str = 'time' + + pixel_columns: list[str] = field(default_factory=lambda: ['x_left', 'y_left']) + + column_map: dict[str, str] = field(default_factory=lambda: {}) + + custom_read_kwargs: dict[str, Any] = field( + default_factory=lambda: { + 'separator': '\t', + 'columns': [ + 'time', 'book_name', 'screen_id', + 'x_left', 'y_left', 'pupil_left', + ], + 'dtypes': [ + pl.Int64, pl.Utf8, pl.Int64, + pl.Float64, pl.Float64, pl.Float64, + ], + }, + ) diff --git a/tests/datasets/datasets_test.py b/tests/datasets/datasets_test.py index 074002c10..02065e943 100644 --- a/tests/datasets/datasets_test.py +++ b/tests/datasets/datasets_test.py @@ -33,6 +33,7 @@ pytest.param(pm.datasets.GazeBaseVR, 'GazeBaseVR', id='GazeBaseVR'), pytest.param(pm.datasets.GazeOnFaces, 'GazeOnFaces', id='GazeOnFaces'), pytest.param(pm.datasets.JuDo1000, 'JuDo1000', id='JuDo1000'), + pytest.param(pm.datasets.SBSAT, 'SBSAT', id='SBSAT'), ], ) def test_public_dataset_registered(definition_class, dataset_name): @@ -49,6 +50,7 @@ def test_public_dataset_registered(definition_class, dataset_name): pytest.param(pm.datasets.GazeBaseVR, id='GazeBaseVR'), pytest.param(pm.datasets.GazeOnFaces, id='GazeOnFaces'), pytest.param(pm.datasets.JuDo1000, id='JuDo1000'), + pytest.param(pm.datasets.SBSAT, id='SBSAT'), ], ) def test_public_dataset_registered_correct_attributes(dataset_definition_class): diff --git a/tests/datasets/sbsat_test.py b/tests/datasets/sbsat_test.py new file mode 100644 index 000000000..3fcd41b93 --- /dev/null +++ b/tests/datasets/sbsat_test.py @@ -0,0 +1,79 @@ +# Copyright (c) 2023 The pymovements Project Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +"""Test all functionality in pymovements.dataset.sb_sat.""" +from pathlib import Path + +import pytest + +import pymovements as pm + + +@pytest.mark.parametrize( + 'init_path, expected_paths', + [ + pytest.param( + '/data/set/path', + { + 'root': Path('/data/set/path/'), + 'dataset': Path('/data/set/path/'), + 'download': Path('/data/set/path/downloads'), + }, + ), + pytest.param( + pm.DatasetPaths(root='/data/set/path'), + { + 'root': Path('/data/set/path/'), + 'dataset': Path('/data/set/path/SBSAT'), + 'download': Path('/data/set/path/SBSAT/downloads'), + }, + ), + pytest.param( + pm.DatasetPaths(root='/data/set/path', dataset='.'), + { + 'root': Path('/data/set/path/'), + 'dataset': Path('/data/set/path/'), + 'download': Path('/data/set/path/downloads'), + }, + ), + pytest.param( + pm.DatasetPaths(root='/data/set/path', dataset='dataset'), + { + 'root': Path('/data/set/path/'), + 'dataset': Path('/data/set/path/dataset'), + 'download': Path('/data/set/path/dataset/downloads'), + }, + ), + pytest.param( + pm.DatasetPaths(root='/data/set/path', downloads='custom_downloads'), + { + 'root': Path('/data/set/path/'), + 'dataset': Path('/data/set/path/SBSAT'), + 'download': Path('/data/set/path/SBSAT/custom_downloads'), + }, + ), + ], +) +def test_paths(init_path, expected_paths): + dataset = pm.Dataset(pm.datasets.SBSAT, path=init_path) + + assert dataset.paths.root == expected_paths['root'] + assert dataset.path == expected_paths['dataset'] + assert dataset.paths.dataset == expected_paths['dataset'] + assert dataset.paths.downloads == expected_paths['download']