From 1c6c76912857e9d9e649dc440e8d19013dcede6c Mon Sep 17 00:00:00 2001 From: SiQube Date: Wed, 23 Oct 2024 08:24:58 +0200 Subject: [PATCH 01/28] feat: split_gaze_data into trial --- src/pymovements/dataset/dataset.py | 34 +++++++++++++++++++ tests/unit/dataset/dataset_test.py | 53 ++++++++++++++++++++++++++++-- 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/src/pymovements/dataset/dataset.py b/src/pymovements/dataset/dataset.py index 9c851341..755892fd 100644 --- a/src/pymovements/dataset/dataset.py +++ b/src/pymovements/dataset/dataset.py @@ -231,6 +231,40 @@ def load_precomputed_reading_measures(self) -> None: self.paths, ) + def _split_gaze_data( + self, + by: list[str] | str, + ) -> None: + """Split gaze data into seperated GazeDataFrame's. + + Parameters + ---------- + by: list[str] | str + Column's to split dataframe by. + """ + if isinstance(by, str): + by = [by] + new_data = [ + ( + GazeDataFrame( + new_frame, + experiment=_frame.experiment, + trial_columns=self.definition.trial_columns, + time_column=self.definition.time_column, + time_unit=self.definition.time_unit, + position_columns=self.definition.position_columns, + velocity_columns=self.definition.velocity_columns, + acceleration_columns=self.definition.acceleration_columns, + distance_column=self.definition.distance_column, + ), + fileinfo_row, + ) + for (_frame, fileinfo_row) in zip(self.gaze, self.fileinfo['gaze'].to_dicts()) + for new_frame in _frame.frame.partition_by(by=by) + ] + self.gaze = [data[0] for data in new_data] + self.fileinfo['gaze'] = pl.concat([pl.from_dict(data[1]) for data in new_data]) + def split_precomputed_events( self, by: list[str] | str, diff --git a/tests/unit/dataset/dataset_test.py b/tests/unit/dataset/dataset_test.py index 7bb14470..17f3e97a 100644 --- a/tests/unit/dataset/dataset_test.py +++ b/tests/unit/dataset/dataset_test.py @@ -146,6 +146,8 @@ def mock_toy( 'y_left_pix': np.zeros(1000), 'x_right_pix': np.zeros(1000), 'y_right_pix': np.zeros(1000), + 'trial_id_1': np.concatenate([np.zeros(500), np.ones(500)]), + 'trial_id_2': ['a'] * 200 + ['b'] * 200 + ['c'] * 600, }, schema={ 'subject_id': pl.Int64, @@ -154,6 +156,8 @@ def mock_toy( 'y_left_pix': pl.Float64, 'x_right_pix': pl.Float64, 'y_right_pix': pl.Float64, + 'trial_id_1': pl.Float64, + 'trial_id_2': pl.Utf8, }, ) pixel_columns = ['x_left_pix', 'y_left_pix', 'x_right_pix', 'y_right_pix'] @@ -169,6 +173,8 @@ def mock_toy( 'y_right_pix': np.zeros(1000), 'x_avg_pix': np.zeros(1000), 'y_avg_pix': np.zeros(1000), + 'trial_id_1': np.concatenate([np.zeros(500), np.ones(500)]), + 'trial_id_2': ['a'] * 200 + ['b'] * 200 + ['c'] * 600, }, schema={ 'subject_id': pl.Int64, @@ -179,6 +185,8 @@ def mock_toy( 'y_right_pix': pl.Float64, 'x_avg_pix': pl.Float64, 'y_avg_pix': pl.Float64, + 'trial_id_1': pl.Float64, + 'trial_id_2': pl.Utf8, }, ) pixel_columns = [ @@ -192,12 +200,16 @@ def mock_toy( 'time': np.arange(1000), 'x_left_pix': np.zeros(1000), 'y_left_pix': np.zeros(1000), + 'trial_id_1': np.concatenate([np.zeros(500), np.ones(500)]), + 'trial_id_2': ['a'] * 200 + ['b'] * 200 + ['c'] * 600, }, schema={ 'subject_id': pl.Int64, 'time': pl.Int64, 'x_left_pix': pl.Float64, 'y_left_pix': pl.Float64, + 'trial_id_1': pl.Float64, + 'trial_id_2': pl.Utf8, }, ) pixel_columns = ['x_left_pix', 'y_left_pix'] @@ -208,12 +220,16 @@ def mock_toy( 'time': np.arange(1000), 'x_right_pix': np.zeros(1000), 'y_right_pix': np.zeros(1000), + 'trial_id_1': np.concatenate([np.zeros(500), np.ones(500)]), + 'trial_id_2': ['a'] * 200 + ['b'] * 200 + ['c'] * 600, }, schema={ 'subject_id': pl.Int64, 'time': pl.Int64, 'x_right_pix': pl.Float64, 'y_right_pix': pl.Float64, + 'trial_id_1': pl.Float64, + 'trial_id_2': pl.Utf8, }, ) pixel_columns = ['x_right_pix', 'y_right_pix'] @@ -224,12 +240,16 @@ def mock_toy( 'time': np.arange(1000), 'x_pix': np.zeros(1000), 'y_pix': np.zeros(1000), + 'trial_id_1': np.concatenate([np.zeros(500), np.ones(500)]), + 'trial_id_2': ['a'] * 200 + ['b'] * 200 + ['c'] * 600, }, schema={ 'subject_id': pl.Int64, 'time': pl.Int64, 'x_pix': pl.Float64, 'y_pix': pl.Float64, + 'trial_id_1': pl.Float64, + 'trial_id_2': pl.Utf8, }, ) pixel_columns = ['x_pix', 'y_pix'] @@ -1000,7 +1020,8 @@ def test_detect_events_attribute_error(gaze_dataset_configuration): }, ( "Column 'position' not found. Available columns are: " - "['time', 'subject_id', 'pixel', 'custom_position', 'velocity']" + "['time', 'trial_id_1', 'trial_id_2', 'subject_id', " + "'pixel', 'custom_position', 'velocity']" ), id='no_position', ), @@ -1012,7 +1033,8 @@ def test_detect_events_attribute_error(gaze_dataset_configuration): }, ( "Column 'velocity' not found. Available columns are: " - "['time', 'subject_id', 'pixel', 'position', 'custom_velocity']" + "['time', 'trial_id_1', 'trial_id_2', 'subject_id', " + "'pixel', 'position', 'custom_velocity']" ), id='no_velocity', ), @@ -1930,3 +1952,30 @@ def test_load_split_precomputed_events(precomputed_dataset_configuration, by, ex dataset.load() dataset.split_precomputed_events(by) assert len(dataset.precomputed_events) == expected_len + + +@pytest.mark.parametrize( + ('by', 'expected_len'), + [ + pytest.param( + 'trial_id_1', + 40, + id='subset_int', + ), + pytest.param( + 'trial_id_2', + 60, + id='subset_int', + ), + pytest.param( + ['trial_id_1', 'trial_id_2'], + 80, + id='subset_int', + ), + ], +) +def test_load_split_gaze(gaze_dataset_configuration, by, expected_len): + dataset = pm.Dataset(**gaze_dataset_configuration['init_kwargs']) + dataset.load() + dataset._split_gaze_data(by) + assert len(dataset.gaze) == expected_len From 976695b9b2f08c272a6d07999cf92c0529297632 Mon Sep 17 00:00:00 2001 From: "Daniel G. Krakowczyk" Date: Wed, 23 Oct 2024 14:52:36 +0200 Subject: [PATCH 02/28] docs: Add missing modules to documentation (#866) Some modules where missing from the html documentation. This PR adds these. --- docs/source/reference/index.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst index 3cc37346..f8add28e 100644 --- a/docs/source/reference/index.rst +++ b/docs/source/reference/index.rst @@ -11,6 +11,9 @@ pymovements.datasets pymovements.events pymovements.gaze + pymovements.measure pymovements.plotting + pymovements.reading_measures + pymovements.stimulus pymovements.synthetic pymovements.utils From 953ade30dc97cc7362fe6faf0f358e686c9f1920 Mon Sep 17 00:00:00 2001 From: "David R. Reich" <43832476+SiQube@users.noreply.github.com> Date: Thu, 24 Oct 2024 07:11:48 +0200 Subject: [PATCH 03/28] hotfix: check whether public dataset has gaze files (#872) --- .../public_dataset_processing_test.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/integration/public_dataset_processing_test.py b/tests/integration/public_dataset_processing_test.py index 85e06c88..e1d79de3 100644 --- a/tests/integration/public_dataset_processing_test.py +++ b/tests/integration/public_dataset_processing_test.py @@ -39,14 +39,15 @@ def test_public_dataset_processing(dataset_name, tmp_path): dataset.load() # Do some basic transformations. - if 'pixel' in dataset.gaze[0].columns: - dataset.pix2deg() - dataset.pos2vel() - dataset.pos2acc() - - for gaze in dataset.gaze: - assert 'position' in gaze.columns - assert 'velocity' in gaze.columns - assert 'acceleration' in gaze.columns - - shutil.rmtree(dataset_path, ignore_errors=True) + if dataset.definition.has_files['gaze']: + if 'pixel' in dataset.gaze[0].columns: + dataset.pix2deg() + dataset.pos2vel() + dataset.pos2acc() + + for gaze in dataset.gaze: + assert 'position' in gaze.columns + assert 'velocity' in gaze.columns + assert 'acceleration' in gaze.columns + + shutil.rmtree(dataset_path, ignore_errors=True) From b842bdba9b40d40206dff3f2b9ebbaf809ecc784 Mon Sep 17 00:00:00 2001 From: "Daniel G. Krakowczyk" Date: Thu, 24 Oct 2024 12:16:11 +0200 Subject: [PATCH 04/28] docs: correctly add EyeTracker class to gaze module (#876) * docs: Add missing EyeTracker class to html docs * the eye tracker class was not correctly integrated somehow --- src/pymovements/gaze/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pymovements/gaze/__init__.py b/src/pymovements/gaze/__init__.py index 7d0b32ea..ca6a39df 100644 --- a/src/pymovements/gaze/__init__.py +++ b/src/pymovements/gaze/__init__.py @@ -26,6 +26,7 @@ :template: class.rst pymovements.gaze.Experiment + pymovements.gaze.EyeTracker pymovements.gaze.Screen pymovements.gaze.GazeDataFrame @@ -77,6 +78,7 @@ from pymovements.gaze import transforms from pymovements.gaze import transforms_numpy from pymovements.gaze.experiment import Experiment +from pymovements.gaze.eyetracker import EyeTracker from pymovements.gaze.gaze_dataframe import GazeDataFrame from pymovements.gaze.integration import from_numpy from pymovements.gaze.integration import from_pandas @@ -88,6 +90,7 @@ __all__ = [ 'Experiment', + 'EyeTracker', 'from_numpy', 'from_pandas', 'GazeDataFrame', From cc1bae15e423bfd75f15fd8f4ea3c575f367b100 Mon Sep 17 00:00:00 2001 From: "David R. Reich" <43832476+SiQube@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:01:32 +0200 Subject: [PATCH 05/28] feat: add support for .ias files in stimulus.text.from_file() (#858) --- src/pymovements/stimulus/text.py | 2 +- tests/unit/stimulus/text_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pymovements/stimulus/text.py b/src/pymovements/stimulus/text.py index 397924ac..3d184006 100644 --- a/src/pymovements/stimulus/text.py +++ b/src/pymovements/stimulus/text.py @@ -135,7 +135,7 @@ def from_file( if custom_read_kwargs is None: custom_read_kwargs = {} - valid_extensions = {'.csv', '.tsv', '.txt'} + valid_extensions = {'.csv', '.tsv', '.txt', '.ias'} if aoi_path.suffix in valid_extensions: stimulus_df = pl.read_csv( aoi_path, diff --git a/tests/unit/stimulus/text_test.py b/tests/unit/stimulus/text_test.py index 1fece5fc..2cf99033 100644 --- a/tests/unit/stimulus/text_test.py +++ b/tests/unit/stimulus/text_test.py @@ -224,5 +224,5 @@ def test_text_stimulus_unsupported_format(): ) msg, = excinfo.value.args expected = 'unsupported file format ".pickle".Supported formats are: '\ - '[\'.csv\', \'.tsv\', \'.txt\']' + '[\'.csv\', \'.ias\', \'.tsv\', \'.txt\']' assert msg == expected From 54178042bbdb141f5f9dd59084351521749f7f16 Mon Sep 17 00:00:00 2001 From: "David R. Reich" <43832476+SiQube@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:23:56 +0200 Subject: [PATCH 06/28] dataset: beijing sentence corpus (#857) --- docs/source/bibliography.bib | 9 ++ src/pymovements/datasets/__init__.py | 3 + src/pymovements/datasets/bsc.py | 200 +++++++++++++++++++++++++++ tests/unit/datasets/datasets_test.py | 3 +- 4 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 src/pymovements/datasets/bsc.py diff --git a/docs/source/bibliography.bib b/docs/source/bibliography.bib index 0d9bfc90..01d32dc4 100644 --- a/docs/source/bibliography.bib +++ b/docs/source/bibliography.bib @@ -1,3 +1,12 @@ +@article{BSC, + author={Pan, Jinger and Yan, Ming and Richter, Eike M. and Shu, Hua and Kliegl, Reinhold}, + title={The Beijing Sentence Corpus: A Chinese sentence corpus with eye movement data and predictability norms}, + journal={Behavior Research Methods}, + year={2022}, + volume={54}, + issue={4}, +} + @article{CodeComprehension, author = {Alakmeh, Tarek and Reich, David and J\"{a}ger, Lena and Fritz, Thomas}, title = {Predicting Code Comprehension: A Novel Approach to Align Human Gaze with Code using Deep Neural Networks}, diff --git a/src/pymovements/datasets/__init__.py b/src/pymovements/datasets/__init__.py index 81e45c14..e98f29c9 100644 --- a/src/pymovements/datasets/__init__.py +++ b/src/pymovements/datasets/__init__.py @@ -25,6 +25,7 @@ :toctree: :template: class.rst + pymovements.datasets.BSC pymovements.datasets.CodeComprehension pymovements.datasets.CopCo pymovements.datasets.DIDEC @@ -48,6 +49,7 @@ pymovements.datasets.ToyDataset pymovements.datasets.ToyDatasetEyeLink """ +from pymovements.datasets.bsc import BSC from pymovements.datasets.codecomprehension import CodeComprehension from pymovements.datasets.copco import CopCo from pymovements.datasets.didec import DIDEC @@ -66,6 +68,7 @@ __all__ = [ + 'BSC', 'CodeComprehension', 'CopCo', 'DIDEC', diff --git a/src/pymovements/datasets/bsc.py b/src/pymovements/datasets/bsc.py new file mode 100644 index 00000000..c7d1db9c --- /dev/null +++ b/src/pymovements/datasets/bsc.py @@ -0,0 +1,200 @@ +# Copyright (c) 2022-2024 The pymovements Project Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +"""Provides a definition for the BSC dataset.""" +from __future__ import annotations + +from dataclasses import dataclass +from dataclasses import field +from typing import Any + +from pymovements.dataset.dataset_definition import DatasetDefinition +from pymovements.dataset.dataset_library import register_dataset +from pymovements.gaze.experiment import Experiment + + +@dataclass +@register_dataset +class BSC(DatasetDefinition): + """BSC dataset :cite:p:`BSC`. + + This dataset includes monocular eye tracking data from a single participant in a single + session. Eye movements are recorded at a sampling frequency of 1,000 Hz using an EyeLink 1000 + eye tracker and precomputed events on aoi level are reported. + + The participant is instructed to read texts and answer questions. + + Check the respective paper for details :cite:p:`BSC`. + + Attributes + ---------- + name: str + The name of the dataset. + + has_files: dict[str, bool] + Indicate whether the dataset contains 'gaze', 'precomputed_events', and + 'precomputed_reading_measures'. + + mirrors: dict[str, tuple[str, ...]] + A tuple of mirrors of the dataset. Each entry must be of type `str` and end with a '/'. + + resources: dict[str, tuple[dict[str, str], ...]] + A tuple of dataset gaze_resources. Each list entry must be a dictionary with the following + keys: + - `resource`: The url suffix of the resource. This will be concatenated with the mirror. + - `filename`: The filename under which the file is saved as. + - `md5`: The MD5 checksum of the respective file. + + extract: dict[str, bool] + Decide whether to extract the data. + + experiment: Experiment + The experiment definition. + + filename_format: dict[str, str] + Regular expression which will be matched before trying to load the file. Namedgroups will + appear in the `fileinfo` dataframe. + + filename_format_schema_overrides: dict[str, dict[str, type]] + If named groups are present in the `filename_format`, this makes it possible to cast + specific named groups to a particular datatype. + + trial_columns: list[str] + The name of the trial columns in the input data frame. If the list is empty or None, + the input data frame is assumed to contain only one trial. If the list is not empty, + the input data frame is assumed to contain multiple trials and the transformation + methods will be applied to each trial separately. + + time_column: str + The name of the timestamp column in the input data frame. This column will be renamed to + ``time``. + + time_unit: str + The unit of the timestamps in the timestamp column in the input data frame. Supported + units are 's' for seconds, 'ms' for milliseconds and 'step' for steps. If the unit is + 'step' the experiment definition must be specified. All timestamps will be converted to + milliseconds. + + pixel_columns: list[str] + The name of the pixel position columns in the input data frame. These columns will be + nested into the column ``pixel``. If the list is empty or None, the nested ``pixel`` + column will not be created. + + column_map: dict[str, str] + The keys are the columns to read, the values are the names to which they should be renamed. + + custom_read_kwargs: dict[str, dict[str, Any]] + If specified, these keyword arguments will be passed to the file reading function. + + Examples + -------- + Initialize your :py:class:`~pymovements.PublicDataset` object with the + :py:class:`~pymovements.SBSAT` definition: + + >>> import pymovements as pm + >>> + >>> dataset = pm.Dataset("SBSAT", path='data/SBSAT') + + Download the dataset resources: + + >>> dataset.download()# doctest: +SKIP + + Load the data into memory: + + >>> dataset.load()# doctest: +SKIP + """ + + # pylint: disable=similarities + # The PublicDatasetDefinition child classes potentially share code chunks for definitions. + + name: str = 'BSC' + + has_files: dict[str, bool] = field( + default_factory=lambda: { + 'gaze': False, + 'precomputed_events': True, + 'precomputed_reading_measures': False, + }, + ) + mirrors: dict[str, tuple[str, ...]] = field( + default_factory=lambda: + { + 'precomputed_events': ( + 'https://osf.io/download/', + ), + }, + ) + resources: dict[str, tuple[dict[str, str], ...]] = field( + default_factory=lambda: + { + 'precomputed_events': ( + { + 'resource': 'xfe4s/', + 'filename': 'BSC.EMD.zip', + 'md5': 'c7118bfe48c91264d69c45d347f11416', + }, + ), + }, + ) + extract: dict[str, bool] = field( + default_factory=lambda: { + 'precomputed_events': True, + }, + ) + + experiment: Experiment = Experiment( + screen_width_px=None, screen_height_px=None, screen_width_cm=None, + screen_height_cm=None, distance_cm=None, origin=None, sampling_rate=1, + ) + + filename_format: dict[str, str] = field( + default_factory=lambda: + { + 'precomputed_events': 'BSC.EMD.txt', + }, + ) + + filename_format_schema_overrides: dict[str, dict[str, type]] = field( + default_factory=lambda: + { + 'precomputed_events': {}, + }, + ) + + trial_columns: list[str] = field( + default_factory=lambda: [ + 'book_name', + 'screen_id', + ], + ) + + time_column: str = 'time' + + time_unit: str = 'ms' + + pixel_columns: list[str] = field(default_factory=lambda: []) + + column_map: dict[str, str] = field(default_factory=lambda: {}) + + custom_read_kwargs: dict[str, dict[str, Any]] = field( + default_factory=lambda: + { + 'precomputed_events': {'separator': '\t'}, + }, + ) diff --git a/tests/unit/datasets/datasets_test.py b/tests/unit/datasets/datasets_test.py index 6a3e02d5..6c6bc309 100644 --- a/tests/unit/datasets/datasets_test.py +++ b/tests/unit/datasets/datasets_test.py @@ -29,8 +29,9 @@ @pytest.mark.parametrize( ('public_dataset', 'dataset_name'), - # XXX: add public dataset in alphabetical order + # please add datasets in alphabetical order [ + pytest.param(pm.datasets.BSC, 'BSC', id='BSC'), pytest.param(pm.datasets.CodeComprehension, 'CodeComprehension', id='CodeComprehension'), pytest.param(pm.datasets.CopCo, 'CopCo', id='CopCo'), pytest.param(pm.datasets.DIDEC, 'DIDEC', id='DIDEC'), From 92b49a73ef3a6d6366edccc2d1cc0bddde644b0a Mon Sep 17 00:00:00 2001 From: "David R. Reich" <43832476+SiQube@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:38:43 +0200 Subject: [PATCH 07/28] dataset: add InteRead dataset (#862) --- docs/source/bibliography.bib | 24 +++ src/pymovements/datasets/__init__.py | 3 + src/pymovements/datasets/interead.py | 222 +++++++++++++++++++++++++++ tests/unit/datasets/datasets_test.py | 1 + 4 files changed, 250 insertions(+) create mode 100644 src/pymovements/datasets/interead.py diff --git a/docs/source/bibliography.bib b/docs/source/bibliography.bib index 01d32dc4..f223262e 100644 --- a/docs/source/bibliography.bib +++ b/docs/source/bibliography.bib @@ -148,6 +148,30 @@ @article{GazeBase doi = {10.1038/s41597-021-00959-y}, } +@inproceedings{InteRead, + title = "{I}nte{R}ead: An Eye Tracking Dataset of Interrupted Reading", + author = {Zermiani, Francesca and + Dhar, Prajit and + Sood, Ekta and + K{\"o}gel, Fabian and + Bulling, Andreas and + Wirzberger, Maria}, + editor = "Calzolari, Nicoletta and + Kan, Min-Yen and + Hoste, Veronique and + Lenci, Alessandro and + Sakti, Sakriani and + Xue, Nianwen", + booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)", + month = may, + year = "2024", + address = "Torino, Italia", + publisher = "ELRA and ICCL", + url = "https://aclanthology.org/2024.lrec-main.802", + pages = "9154--9169", + abstract = "Eye movements during reading offer a window into cognitive processes and language comprehension, but the scarcity of reading data with interruptions {--} which learners frequently encounter in their everyday learning environments {--} hampers advances in the development of intelligent learning technologies. We introduce InteRead {--} a novel 50-participant dataset of gaze data recorded during self-paced reading of real-world text. InteRead further offers fine-grained annotations of interruptions interspersed throughout the text as well as resumption lags incurred by these interruptions. Interruptions were triggered automatically once readers reached predefined target words. We validate our dataset by reporting interdisciplinary analyses on different measures of gaze behavior. In line with prior research, our analyses show that the interruptions as well as word length and word frequency effects significantly impact eye movements during reading. We also explore individual differences within our dataset, shedding light on the potential for tailored educational solutions. InteRead is accessible from our datasets web-page: https://www.ife.uni-stuttgart.de/en/llis/research/datasets/.", + } + @misc{JuDo1000, author = {Makowski, Silvia and Jäger, Lena A. and Prasse, Paul and Scheffer, Tobias}, title = {{JuDo1000} Eye Tracking Data Set}, diff --git a/src/pymovements/datasets/__init__.py b/src/pymovements/datasets/__init__.py index e98f29c9..bd02aedb 100644 --- a/src/pymovements/datasets/__init__.py +++ b/src/pymovements/datasets/__init__.py @@ -35,6 +35,7 @@ pymovements.datasets.GazeGraph pymovements.datasets.GazeOnFaces pymovements.datasets.HBN + pymovements.datasets.InteRead pymovements.datasets.JuDo1000 pymovements.datasets.PoTeC pymovements.datasets.SBSAT @@ -60,6 +61,7 @@ from pymovements.datasets.gazebase import GazeBase from pymovements.datasets.gazebasevr import GazeBaseVR from pymovements.datasets.hbn import HBN +from pymovements.datasets.interead import InteRead from pymovements.datasets.judo1000 import JuDo1000 from pymovements.datasets.potec import PoTeC from pymovements.datasets.sb_sat import SBSAT @@ -79,6 +81,7 @@ 'GazeGraph', 'GazeOnFaces', 'HBN', + 'InteRead', 'JuDo1000', 'PoTeC', 'SBSAT', diff --git a/src/pymovements/datasets/interead.py b/src/pymovements/datasets/interead.py new file mode 100644 index 00000000..27d3a940 --- /dev/null +++ b/src/pymovements/datasets/interead.py @@ -0,0 +1,222 @@ +# Copyright (c) 2022-2024 The pymovements Project Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +"""Provides a definition for the InteRead dataset.""" +from __future__ import annotations + +from dataclasses import dataclass +from dataclasses import field +from typing import Any + +from pymovements.dataset.dataset_definition import DatasetDefinition +from pymovements.dataset.dataset_library import register_dataset +from pymovements.gaze.experiment import Experiment + + +@dataclass +@register_dataset +class InteRead(DatasetDefinition): + """InteRead dataset :cite:p:`InteRead`. + + This dataset includes monocular eye tracking data in an interrupted reading task. + Automatic interruption occured during a reading task and participants continued + reading after the pause. + Eye movements are recorded at a sampling frequency of 1200Hz with video-based eye tracker. + Provided data are raw gaze samples and precomputed event files both in pixel coordinates. + + For more details, check the paper :cite:p:`InteRead`. + + Attributes + ---------- + name: str + The name of the dataset. + + has_files: dict[str, bool] + Indicate whether the dataset contains 'gaze', 'precomputed_events', and + 'precomputed_reading_measures'. + + mirrors: dict[str, tuple[str, ...]] + A tuple of mirrors of the dataset. Each entry must be of type `str` and end with a '/'. + + resources: dict[str, tuple[dict[str, str], ...]] + A tuple of dataset gaze_resources. Each list entry must be a dictionary with the following + keys: + - `resource`: The url suffix of the resource. This will be concatenated with the mirror. + - `filename`: The filename under which the file is saved as. + - `md5`: The MD5 checksum of the respective file. + + extract: dict[str, bool] + Decide whether to extract the data. + + experiment: Experiment + The experiment definition. + + filename_format: dict[str, str] + Regular expression which will be matched before trying to load the file. Namedgroups will + appear in the `fileinfo` dataframe. + + filename_format_schema_overrides: dict[str, dict[str, type]] + If named groups are present in the `filename_format`, this makes it possible to cast + specific named groups to a particular datatype. + + trial_columns: list[str] + The name of the trial columns in the input data frame. If the list is empty or None, + the input data frame is assumed to contain only one trial. If the list is not empty, + the input data frame is assumed to contain multiple trials and the transformation + methods will be applied to each trial separately. + + time_column: str + The name of the timestamp column in the input data frame. This column will be renamed to + ``time``. + + time_unit: str + The unit of the timestamps in the timestamp column in the input data frame. Supported + units are 's' for seconds, 'ms' for milliseconds and 'step' for steps. If the unit is + 'step' the experiment definition must be specified. All timestamps will be converted to + milliseconds. + + pixel_columns: list[str] + The name of the pixel position columns in the input data frame. These columns will be + nested into the column ``pixel``. If the list is empty or None, the nested ``pixel`` + column will not be created. + + column_map: dict[str, str] + The keys are the columns to read, the values are the names to which they should be renamed. + + custom_read_kwargs: dict[str, dict[str, Any]] + If specified, these keyword arguments will be passed to the file reading function. + + + Examples + -------- + Initialize your :py:class:`~pymovements.PublicDataset` object with the + :py:class:`~pymovements.InteRead` definition: + + >>> import pymovements as pm + >>> + >>> dataset = pm.Dataset("InteRead", path='data/InteRead') + + Download the dataset resources: + + >>> dataset.download()# doctest: +SKIP + + Load the data into memory: + + >>> dataset.load()# doctest: +SKIP + """ + + # pylint: disable=similarities + # The PublicDatasetDefinition child classes potentially share code chunks for definitions. + + name: str = 'InteRead' + + has_files: dict[str, bool] = field( + default_factory=lambda: { + 'gaze': True, + 'precomputed_events': True, + 'precomputed_reading_measures': False, + }, + ) + mirrors: dict[str, tuple[str, ...]] = field( + default_factory=lambda: { + 'gaze': ( + 'https://osf.io/download/', + ), + 'precomputed_events': ( + 'https://osf.io/download/', + ), + }, + ) + + resources: dict[str, tuple[dict[str, str], ...]] = field( + default_factory=lambda: { + 'gaze': ( + { + 'resource': '6ju3x/', + 'filename': 'resampled_gaze.csv.zip', + 'md5': '06b2cdff1827086fa125a703ee9d4324', + }, + ), + 'precomputed_events': ( + { + 'resource': '85ckh/', + 'filename': 'resumption_fixation.csv', + 'md5': '44edb7c58318ad76af1fa6f1bc1f1ceb', + }, + ), + }, + ) + extract: dict[str, bool] = field( + default_factory=lambda: { + 'gaze': True, + 'precomputed_events': False, + }, + ) + + experiment: Experiment = Experiment( + screen_width_px=1920, + screen_height_px=1080, + screen_width_cm=52.8, + screen_height_cm=29.7, + distance_cm=57, + origin='center', + sampling_rate=1200, + ) + + filename_format: dict[str, str] = field( + default_factory=lambda: { + 'gaze': r'resampled_gaze.csv', + 'precomputed_events': r'resumption_fixation.csv', + }, + ) + + filename_format_schema_overrides: dict[str, dict[str, type]] = field( + default_factory=lambda: { + 'gaze': {}, + 'precomputed_events': {}, + }, + ) + + trial_columns: list[str] = field( + default_factory=lambda: [ + 'participant_id', + 'page_id', + 'interruption_state', + ], + ) + + time_column: str = '' + + time_unit: str = 'ms' + + pixel_columns: list[str] = field( + default_factory=lambda: [ + 'x', + 'y', + ], + ) + + column_map: dict[str, str] = field(default_factory=lambda: {}) + + custom_read_kwargs: dict[str, dict[str, Any]] = field( + default_factory=lambda: { + 'gaze': {}, + 'precomputed_events': {}, + }, + ) diff --git a/tests/unit/datasets/datasets_test.py b/tests/unit/datasets/datasets_test.py index 6c6bc309..35fd3542 100644 --- a/tests/unit/datasets/datasets_test.py +++ b/tests/unit/datasets/datasets_test.py @@ -41,6 +41,7 @@ pytest.param(pm.datasets.GazeBaseVR, 'GazeBaseVR', id='GazeBaseVR'), pytest.param(pm.datasets.GazeOnFaces, 'GazeOnFaces', id='GazeOnFaces'), pytest.param(pm.datasets.HBN, 'HBN', id='HBN'), + pytest.param(pm.datasets.InteRead, 'InteRead', id='InteRead'), pytest.param(pm.datasets.JuDo1000, 'JuDo1000', id='JuDo1000'), pytest.param(pm.datasets.PoTeC, 'PoTeC', id='PoTeC'), pytest.param(pm.datasets.SBSAT, 'SBSAT', id='SBSAT'), From f0b69a91b1bf6289c3b88ad082c79501a2cbc1df Mon Sep 17 00:00:00 2001 From: "David R. Reich" <43832476+SiQube@users.noreply.github.com> Date: Thu, 24 Oct 2024 16:48:04 +0200 Subject: [PATCH 08/28] fix: copy event resource files instead of moving them to events directory (#863) --- src/pymovements/dataset/dataset_download.py | 2 +- tests/unit/dataset/dataset_download_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pymovements/dataset/dataset_download.py b/src/pymovements/dataset/dataset_download.py index 5131bfc9..752dbdd0 100644 --- a/src/pymovements/dataset/dataset_download.py +++ b/src/pymovements/dataset/dataset_download.py @@ -268,7 +268,7 @@ def extract_dataset( verbose=verbose, ) else: - shutil.move(source_path, destination_path / resource['filename']) + shutil.copy(source_path, destination_path / resource['filename']) if definition.has_files['precomputed_reading_measures']: paths.precomputed_reading_measures.mkdir(parents=True, exist_ok=True) diff --git a/tests/unit/dataset/dataset_download_test.py b/tests/unit/dataset/dataset_download_test.py index 8ba7f5b6..2f2586a6 100644 --- a/tests/unit/dataset/dataset_download_test.py +++ b/tests/unit/dataset/dataset_download_test.py @@ -1205,7 +1205,7 @@ class PrecomputedResourcesDefinition(pm.DatasetDefinition): pm.dataset.dataset_download.extract_dataset( PrecomputedResourcesDefinition(), - pm.DatasetPaths(root='tests/files/', downloads='.', precomputed_events='.'), + pm.DatasetPaths(root='tests/files/', downloads='.'), ) From e6a9ced5b525940ab00cf81daa5c4c33518dc08e Mon Sep 17 00:00:00 2001 From: "David R. Reich" <43832476+SiQube@users.noreply.github.com> Date: Thu, 24 Oct 2024 17:23:42 +0200 Subject: [PATCH 09/28] hotfix: CopCo dataset precomputed eventsloading (#873) --- src/pymovements/datasets/copco.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pymovements/datasets/copco.py b/src/pymovements/datasets/copco.py index 292d54d8..1edff805 100644 --- a/src/pymovements/datasets/copco.py +++ b/src/pymovements/datasets/copco.py @@ -223,6 +223,7 @@ class CopCo(DatasetDefinition): 'infer_schema_length': 100000, 'truncate_ragged_lines': True, 'decimal_comma': True, + 'quote_char': None, }, 'precomputed_reading_measures': {}, }, From 1b8c4bdd1f17252436ca77d0079dd79391f96a8e Mon Sep 17 00:00:00 2001 From: "Daniel G. Krakowczyk" Date: Fri, 25 Oct 2024 06:02:16 +0200 Subject: [PATCH 10/28] ci: ignore too-many-public-methods (#882) --- pylintrc | 1 + 1 file changed, 1 insertion(+) diff --git a/pylintrc b/pylintrc index 39a7c976..d04bbd47 100644 --- a/pylintrc +++ b/pylintrc @@ -104,6 +104,7 @@ disable= too-many-instance-attributes, too-many-locals, too-many-positional-arguments, + too-many-public-methods, [REPORTS] From 69ef837ef2eba0bb1ee820faae1da2046fa17364 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Oct 2024 13:52:26 +0100 Subject: [PATCH 11/28] ci: pre-commit autoupdate (#889) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/asottile/pyupgrade: v3.18.0 → v3.19.0](https://github.com/asottile/pyupgrade/compare/v3.18.0...v3.19.0) - [github.com/pre-commit/mirrors-mypy: v1.12.1 → v1.13.0](https://github.com/pre-commit/mirrors-mypy/compare/v1.12.1...v1.13.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b6bf9a68..60b0e965 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,7 +13,7 @@ repos: hooks: - id: add-trailing-comma - repo: https://github.com/asottile/pyupgrade - rev: v3.18.0 + rev: v3.19.0 hooks: - id: pyupgrade args: [--py39-plus] @@ -68,7 +68,7 @@ repos: - id: nbqa-pyupgrade args: ["--py39-plus"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.12.1 + rev: v1.13.0 hooks: - id: mypy additional_dependencies: [pandas-stubs, types-tqdm] From cfbce95f0ee718f61af3b9104c827e0f89d0bc7f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 5 Nov 2024 15:47:33 +0100 Subject: [PATCH 12/28] ci: pre-commit autoupdate (#890) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/kynan/nbstripout: 0.7.1 → 0.8.0](https://github.com/kynan/nbstripout/compare/0.7.1...0.8.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 60b0e965..70dbde98 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,7 +43,7 @@ repos: hooks: - id: rm-unneeded-f-str - repo: https://github.com/kynan/nbstripout - rev: 0.7.1 + rev: 0.8.0 hooks: - id: nbstripout - repo: https://github.com/Lucas-C/pre-commit-hooks From 47e734d1a3f3505ca0d0f33ea0e36defe12fefe8 Mon Sep 17 00:00:00 2001 From: "David R. Reich" <43832476+SiQube@users.noreply.github.com> Date: Thu, 7 Nov 2024 18:02:31 +0100 Subject: [PATCH 13/28] build: add support for python 3.13 (#845) --- .github/workflows/tests.yml | 5 +++++ pyproject.toml | 1 + tox.ini | 1 + 3 files changed, 7 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 70478917..6a4fa3b8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -24,6 +24,7 @@ jobs: - py310 - py311 - py312 + - py313 exclude: - os: macos-latest tox_env: py39 @@ -33,6 +34,8 @@ jobs: tox_env: py311 - os: macos-13 tox_env: py312 + - os: macos-13 + tox_env: py313 include: - tox_env: py39 python: "3.9" @@ -42,6 +45,8 @@ jobs: python: "3.11" - tox_env: py312 python: "3.12" + - tox_env: py313 + python: "3.13" steps: - uses: actions/checkout@v4 with: diff --git a/pyproject.toml b/pyproject.toml index c4d9ae1a..55df656a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering", "Typing :: Typed" ] diff --git a/tox.ini b/tox.ini index 26403795..bef1de77 100644 --- a/tox.ini +++ b/tox.ini @@ -6,6 +6,7 @@ envlist = py310 py311 py312 + py313 build docs coverage From 166b07608fdba52e4afdd76ea35c5e8398983913 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 7 Nov 2024 23:51:50 +0100 Subject: [PATCH 14/28] build: update nbsphinx requirement from <0.9.5,>=0.8.8 to >=0.8.8,<0.9.6 (#777) Updates the requirements on [nbsphinx](https://github.com/spatialaudio/nbsphinx) to permit the latest version. - [Release notes](https://github.com/spatialaudio/nbsphinx/releases) - [Changelog](https://github.com/spatialaudio/nbsphinx/blob/master/NEWS.rst) - [Commits](https://github.com/spatialaudio/nbsphinx/compare/0.8.8...0.9.5) --- updated-dependencies: - dependency-name: nbsphinx dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 55df656a..5d81c753 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ dynamic = ["version"] docs = [ "ipykernel>=6.13.0", "nbconvert>=7.0.0,<7.17", - "nbsphinx>=0.8.8,<0.9.5", + "nbsphinx>=0.8.8,<0.9.6", "pandoc", "pybtex", "pydata-sphinx-theme>=0.12", From 495e5d968fd1bbeeff04dbd5de3441734c333935 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 12 Nov 2024 18:59:00 +0100 Subject: [PATCH 15/28] ci: pre-commit autoupdate (#896) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/nbQA-dev/nbQA: 1.8.7 → 1.9.1](https://github.com/nbQA-dev/nbQA/compare/1.8.7...1.9.1) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 70dbde98..24f6d0ae 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -58,7 +58,7 @@ repos: - id: pydoclint args: ["--config=pyproject.toml"] - repo: https://github.com/nbQA-dev/nbQA - rev: 1.8.7 + rev: 1.9.1 hooks: - id: nbqa-autopep8 - id: nbqa-flake8 From b691e6d7d23d1a50b77d1de158adecce65366c42 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Nov 2024 22:49:37 +0000 Subject: [PATCH 16/28] build: update setuptools-git-versioning requirement from <2 to <3 (#895) Updates the requirements on [setuptools-git-versioning](https://github.com/dolfinus/setuptools-git-versioning) to permit the latest version. - [Release notes](https://github.com/dolfinus/setuptools-git-versioning/releases) - [Changelog](https://github.com/dolfinus/setuptools-git-versioning/blob/master/CHANGELOG.rst) - [Commits](https://github.com/dolfinus/setuptools-git-versioning/compare/v0.0.1...v2.0.0) --- updated-dependencies: - dependency-name: setuptools-git-versioning dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5d81c753..bd6a928f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [build-system] requires = [ "setuptools>=61.0.0", - "setuptools-git-versioning<2", + "setuptools-git-versioning<3", "versioneer[toml]>=0.29,<1.0", "wheel" ] From 88113c86cb15697c3ef446309861785a6423867f Mon Sep 17 00:00:00 2001 From: "David R. Reich" <43832476+SiQube@users.noreply.github.com> Date: Wed, 13 Nov 2024 17:11:10 +0100 Subject: [PATCH 17/28] hotfix: download link fakenewsperception dataset (#897) Co-authored-by: aarbeikop <119419892+aarbeikop@users.noreply.github.com> --- src/pymovements/datasets/fakenews.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pymovements/datasets/fakenews.py b/src/pymovements/datasets/fakenews.py index 64c2cf9f..0d8daa5a 100644 --- a/src/pymovements/datasets/fakenews.py +++ b/src/pymovements/datasets/fakenews.py @@ -102,7 +102,7 @@ class FakeNewsPerception(DatasetDefinition): mirrors: dict[str, tuple[str, ...]] = field( default_factory=lambda: { - 'precomputed_events': ('https://doi.org/10.7910/DVN/C1UD2A',), + 'precomputed_events': ('https://dataverse.harvard.edu/',), }, ) @@ -132,7 +132,7 @@ class FakeNewsPerception(DatasetDefinition): filename_format: dict[str, str] = field( default_factory=lambda: { - 'precomputed_events': r'P{subject_id:d}_{session_id:d}_{truth_value:s}.csv', + 'precomputed_events': r'P{subject_id:d}_S{session_id:d}_{truth_value:s}.csv', }, ) From 21fd0d21191051e64426cf1e0d1af1a994e818d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20S=C3=A4uberli?= <38892775+saeub@users.noreply.github.com> Date: Thu, 14 Nov 2024 21:41:26 +0100 Subject: [PATCH 18/28] feat: Store metadata from ASC in experiment metadata (#884) * updated io file * updated test file * Add tests for metadata parsing from ASC file * Squashed commit of the following: commit 14d047cff314b4ef1f8b8bfe428286c881a86aea Author: Faizan Ansari Date: Thu Oct 24 22:02:30 2024 +0200 Remove files from remote directory commit aa78078f46b1731604691d0332c8d3fea747ec14 Author: Faizan Ansari Date: Thu Oct 24 21:53:35 2024 +0200 updated code commit cae54cccd75800dcf8a903de5ca130c7e7c9d724 Author: Faizan Ansari Date: Thu Oct 24 15:40:16 2024 +0200 changes in io.py file * Fix formatting * Fix indentation * Fix circular imports * 2 test passed * Fix attribute name * Refactor metadata checks, add tests * Fix f-strings * Fix tests * Address comments * Improve test coverage * Add comment about screen resolution * Fix metadata conflict check * Fix test coverage * Fix type hint * Trigger codecov * rebase me * Upgrade codecov action * Revert codecov action upgrade --------- Co-authored-by: Faizan Ansari Co-authored-by: SiQube --- src/pymovements/__init__.py | 2 + src/pymovements/dataset/dataset_files.py | 2 +- .../datasets/toy_dataset_eyelink.py | 4 +- src/pymovements/gaze/gaze_dataframe.py | 3 + src/pymovements/gaze/io.py | 85 ++++++++++- tests/functional/gaze_file_processing_test.py | 4 +- tests/unit/dataset/dataset_files_test.py | 2 +- tests/unit/gaze/io/asc_test.py | 132 +++++++++++++----- tests/unit/utils/parsing_test.py | 36 +++++ 9 files changed, 223 insertions(+), 47 deletions(-) diff --git a/src/pymovements/__init__.py b/src/pymovements/__init__.py index 8cf4f938..cb204c48 100644 --- a/src/pymovements/__init__.py +++ b/src/pymovements/__init__.py @@ -38,6 +38,7 @@ from pymovements.events import EventGazeProcessor from pymovements.events import EventProcessor from pymovements.gaze import Experiment +from pymovements.gaze import EyeTracker from pymovements.gaze import GazeDataFrame from pymovements.gaze import Screen from pymovements.measure import register_sample_measure @@ -60,6 +61,7 @@ 'gaze', 'Experiment', + 'EyeTracker', 'Screen', 'GazeDataFrame', diff --git a/src/pymovements/dataset/dataset_files.py b/src/pymovements/dataset/dataset_files.py index 1553fddb..dfc4c40e 100644 --- a/src/pymovements/dataset/dataset_files.py +++ b/src/pymovements/dataset/dataset_files.py @@ -377,7 +377,7 @@ def load_gaze_file( column_schema_overrides=definition.filename_format_schema_overrides['gaze'], ) elif filepath.suffix == '.asc': - gaze_df, _ = from_asc( + gaze_df = from_asc( filepath, experiment=definition.experiment, add_columns=add_columns, diff --git a/src/pymovements/datasets/toy_dataset_eyelink.py b/src/pymovements/datasets/toy_dataset_eyelink.py index 4d70501d..488fb5b7 100644 --- a/src/pymovements/datasets/toy_dataset_eyelink.py +++ b/src/pymovements/datasets/toy_dataset_eyelink.py @@ -166,8 +166,8 @@ class ToyDatasetEyeLink(DatasetDefinition): origin='upper left', eyetracker=EyeTracker( sampling_rate=1000.0, - left=False, - right=True, + left=True, + right=False, model='EyeLink Portable Duo', vendor='EyeLink', ), diff --git a/src/pymovements/gaze/gaze_dataframe.py b/src/pymovements/gaze/gaze_dataframe.py index 10f3e8bc..7d81c962 100644 --- a/src/pymovements/gaze/gaze_dataframe.py +++ b/src/pymovements/gaze/gaze_dataframe.py @@ -283,6 +283,9 @@ def __init__( else: self.events = events.copy() + # Remove this attribute once #893 is fixed + self._metadata: dict[str, Any] | None = None + def apply( self, function: str, diff --git a/src/pymovements/gaze/io.py b/src/pymovements/gaze/io.py index 18868e3d..e68d46fd 100644 --- a/src/pymovements/gaze/io.py +++ b/src/pymovements/gaze/io.py @@ -25,7 +25,8 @@ import polars as pl -from pymovements.gaze import Experiment # pylint: disable=cyclic-import +from pymovements.gaze.experiment import Experiment +from pymovements.gaze.eyetracker import EyeTracker from pymovements.gaze.gaze_dataframe import GazeDataFrame # pylint: disable=cyclic-import from pymovements.utils.parsing import parse_eyelink @@ -277,7 +278,7 @@ def from_asc( experiment: Experiment | None = None, add_columns: dict[str, str] | None = None, column_schema_overrides: dict[str, Any] | None = None, -) -> tuple[GazeDataFrame, dict[str, Any]]: +) -> GazeDataFrame: """Initialize a :py:class:`pymovements.gaze.gaze_dataframe.GazeDataFrame`. Parameters @@ -303,8 +304,8 @@ def from_asc( Returns ------- - tuple[GazeDataFrame, dict[str, Any]] - The gaze data frame and a metadata dictionary read from the asc file. + GazeDataFrame + The gaze data frame read from the asc file. Examples -------- @@ -312,7 +313,7 @@ def from_asc( We can then load the data into a ``GazeDataFrame``: >>> from pymovements.gaze.io import from_asc - >>> gaze, metadata = from_asc(file='tests/files/eyelink_monocular_example.asc') + >>> gaze = from_asc(file='tests/files/eyelink_monocular_example.asc') >>> gaze.frame shape: (16, 3) ┌─────────┬───────┬────────────────┐ @@ -332,7 +333,7 @@ def from_asc( │ 2339290 ┆ 618.0 ┆ [637.6, 531.4] │ │ 2339291 ┆ 618.0 ┆ [637.3, 531.2] │ └─────────┴───────┴────────────────┘ - >>> metadata['sampling_rate'] + >>> gaze.experiment.eyetracker.sampling_rate 1000.0 """ if isinstance(patterns, str): @@ -360,6 +361,75 @@ def from_asc( for fileinfo_key, fileinfo_dtype in column_schema_overrides.items() ]) + if experiment is None: + experiment = Experiment(sampling_rate=metadata['sampling_rate']) + if experiment.eyetracker is None: + experiment.eyetracker = EyeTracker() + + # Compare metadata from experiment definition with metadata from ASC file. + # Fill in missing metadata in experiment definition and raise an error if there are conflicts + issues = [] + + # Screen resolution (assuming that width and height will always be missing or set together) + experiment_resolution = (experiment.screen.width_px, experiment.screen.height_px) + if experiment_resolution == (None, None): + experiment.screen.width_px, experiment.screen.height_px = metadata['resolution'] + elif experiment_resolution != metadata['resolution']: + issues.append(f"Screen resolution: {experiment_resolution} vs. {metadata['resolution']}") + + # Sampling rate + if experiment.eyetracker.sampling_rate is None: + experiment.eyetracker.sampling_rate = metadata['sampling_rate'] + elif experiment.eyetracker.sampling_rate != metadata['sampling_rate']: + issues.append( + f"Sampling rate: {experiment.eyetracker.sampling_rate} vs. {metadata['sampling_rate']}", + ) + + # Tracked eye + asc_left_eye = 'L' in metadata['tracked_eye'] + asc_right_eye = 'R' in metadata['tracked_eye'] + if experiment.eyetracker.left is None: + experiment.eyetracker.left = asc_left_eye + elif experiment.eyetracker.left != asc_left_eye: + issues.append(f"Left eye tracked: {experiment.eyetracker.left} vs. {asc_left_eye}") + if experiment.eyetracker.right is None: + experiment.eyetracker.right = asc_right_eye + elif experiment.eyetracker.right != asc_right_eye: + issues.append(f"Right eye tracked: {experiment.eyetracker.right} vs. {asc_right_eye}") + + # Mount configuration + if experiment.eyetracker.mount is None: + experiment.eyetracker.mount = metadata['mount_configuration']['mount_type'] + elif experiment.eyetracker.mount != metadata['mount_configuration']['mount_type']: + issues.append(f"Mount configuration: {experiment.eyetracker.mount} vs. " + f"{metadata['mount_configuration']['mount_type']}") + + # Eye tracker vendor + asc_vendor = 'EyeLink' if 'EyeLink' in metadata['model'] else None + if experiment.eyetracker.vendor is None: + experiment.eyetracker.vendor = asc_vendor + elif experiment.eyetracker.vendor != asc_vendor: + issues.append(f"Eye tracker vendor: {experiment.eyetracker.vendor} vs. {asc_vendor}") + + # Eye tracker model + if experiment.eyetracker.model is None: + experiment.eyetracker.model = metadata['model'] + elif experiment.eyetracker.model != metadata['model']: + issues.append(f"Eye tracker model: {experiment.eyetracker.model} vs. {metadata['model']}") + + # Eye tracker software version + if experiment.eyetracker.version is None: + experiment.eyetracker.version = metadata['version_number'] + elif experiment.eyetracker.version != metadata['version_number']: + issues.append(f"Eye tracker software version: {experiment.eyetracker.version} vs. " + f"{metadata['version_number']}") + + if issues: + raise ValueError( + 'Experiment metadata does not match the metadata in the ASC file:\n' + + '\n'.join(f'- {issue}' for issue in issues), + ) + # Create gaze data frame. gaze_df = GazeDataFrame( gaze_data, @@ -368,7 +438,8 @@ def from_asc( time_unit='ms', pixel_columns=['x_pix', 'y_pix'], ) - return gaze_df, metadata + gaze_df._metadata = metadata # pylint: disable=protected-access + return gaze_df def from_ipc( diff --git a/tests/functional/gaze_file_processing_test.py b/tests/functional/gaze_file_processing_test.py index 7a0e8449..88e57c54 100644 --- a/tests/functional/gaze_file_processing_test.py +++ b/tests/functional/gaze_file_processing_test.py @@ -71,7 +71,7 @@ def fixture_gaze_init_kwargs(request): }, 'eyelink_monocular': { 'file': 'tests/files/eyelink_monocular_example.asc', - 'experiment': pm.datasets.ToyDatasetEyeLink().experiment, + 'experiment': pm.Experiment(1280, 1024, 38, 30, 60, 'upper left', 1000), }, 'didec': { 'file': 'tests/files/didec_example.txt', @@ -157,7 +157,7 @@ def test_gaze_file_processing(gaze_from_kwargs): elif file_extension in {'.feather', '.ipc'}: gaze = pm.gaze.from_ipc(**gaze_from_kwargs) elif file_extension == '.asc': - gaze, _ = pm.gaze.from_asc(**gaze_from_kwargs) + gaze = pm.gaze.from_asc(**gaze_from_kwargs) assert gaze is not None diff --git a/tests/unit/dataset/dataset_files_test.py b/tests/unit/dataset/dataset_files_test.py index 80af569a..61101ea9 100644 --- a/tests/unit/dataset/dataset_files_test.py +++ b/tests/unit/dataset/dataset_files_test.py @@ -206,7 +206,7 @@ def test_load_eyelink_file(tmp_path, read_kwargs): filepath, fileinfo_row={}, definition=DatasetDefinition( - experiment=pm.Experiment(1024, 768, 38, 30, None, 'center', 100), + experiment=pm.Experiment(1280, 1024, 38, 30, None, 'center', 100), filename_format_schema_overrides={'gaze': {}, 'precomputed_events': {}}, ), custom_read_kwargs=read_kwargs, diff --git a/tests/unit/gaze/io/asc_test.py b/tests/unit/gaze/io/asc_test.py index 40afb2b6..7c2af7f4 100644 --- a/tests/unit/gaze/io/asc_test.py +++ b/tests/unit/gaze/io/asc_test.py @@ -131,64 +131,128 @@ ], ) def test_from_asc_has_shape_and_schema(kwargs, expected_frame): - gaze, _ = pm.gaze.from_asc(**kwargs) + gaze = pm.gaze.from_asc(**kwargs) assert_frame_equal(gaze.frame, expected_frame, check_column_order=False) @pytest.mark.parametrize( - ('kwargs', 'expected_metadata'), + ('kwargs', 'exception', 'message'), [ pytest.param( { 'file': 'tests/files/eyelink_monocular_example.asc', - 'metadata_patterns': [ - {'pattern': r'!V TRIAL_VAR SUBJECT_ID (?P-?\d+)'}, - r'!V TRIAL_VAR STIMULUS_COMBINATION_ID (?P.+)', - ], + 'patterns': 'foobar', }, + ValueError, + "unknown pattern key 'foobar'. Supported keys are: eyelink", + id='unknown_pattern', + ), + ], +) +def test_from_asc_raises_exception(kwargs, exception, message): + with pytest.raises(exception) as excinfo: + pm.gaze.from_asc(**kwargs) + + msg, = excinfo.value.args + assert msg == message + + +@pytest.mark.parametrize( + ('file', 'sampling_rate'), + [ + pytest.param('tests/files/eyelink_monocular_example.asc', 1000.0, id='1khz'), + pytest.param('tests/files/eyelink_monocular_2khz_example.asc', 2000.0, id='2khz'), + ], +) +def test_from_asc_fills_in_experiment_metadata(file, sampling_rate): + gaze = pm.gaze.from_asc(file, experiment=None) + assert gaze.experiment.screen.width_px == 1280 + assert gaze.experiment.screen.height_px == 1024 + assert gaze.experiment.eyetracker.sampling_rate == sampling_rate + assert gaze.experiment.eyetracker.left is True + assert gaze.experiment.eyetracker.right is False + assert gaze.experiment.eyetracker.model == 'EyeLink Portable Duo' + assert gaze.experiment.eyetracker.version == '6.12' + assert gaze.experiment.eyetracker.vendor == 'EyeLink' + assert gaze.experiment.eyetracker.mount == 'Desktop' + + +@pytest.mark.parametrize( + ('experiment_kwargs', 'issues'), + [ + pytest.param( { - 'subject_id': '-1', - 'stimulus_combination_id': 'start', + 'screen_width_px': 1920, + 'screen_height_px': 1080, + 'sampling_rate': 1000, }, - id='eyelink_asc_metadata_patterns', + ['Screen resolution: (1920, 1080) vs. (1280, 1024)'], + id='screen_resolution', ), pytest.param( { - 'file': 'tests/files/eyelink_monocular_example.asc', - 'metadata_patterns': [r'inexistent pattern (?P-?\d+)'], + 'eyetracker': pm.EyeTracker(sampling_rate=500), }, + ['Sampling rate: 500 vs. 1000.0'], + id='eyetracker_sampling_rate', + ), + pytest.param( { - 'value': None, + 'eyetracker': pm.EyeTracker( + left=False, + right=True, + sampling_rate=1000, + mount='Desktop', + ), }, - id='eyelink_asc_metadata_pattern_not_found', + [ + 'Left eye tracked: False vs. True', + 'Right eye tracked: True vs. False', + ], + id='eyetracker_tracked_eye', ), - ], -) -def test_from_asc_metadata_patterns(kwargs, expected_metadata): - _, metadata = pm.gaze.from_asc(**kwargs) - - for key, value in expected_metadata.items(): - assert metadata[key] == value - - -@pytest.mark.parametrize( - ('kwargs', 'exception', 'message'), - [ pytest.param( { - 'file': 'tests/files/eyelink_monocular_example.asc', - 'patterns': 'foobar', + 'eyetracker': pm.EyeTracker( + vendor='Tobii', + model='Tobii Pro Spectrum', + version='1.0', + sampling_rate=1000, + left=True, + right=False, + ), }, - ValueError, - "unknown pattern key 'foobar'. Supported keys are: eyelink", - id='unknown_pattern', + [ + 'Eye tracker vendor: Tobii vs. EyeLink', + 'Eye tracker model: Tobii Pro Spectrum vs. EyeLink Portable Duo', + 'Eye tracker software version: 1.0 vs. 6.12', + ], + id='eyetracker_vendor_model_version', + ), + pytest.param( + { + 'eyetracker': pm.EyeTracker( + mount='Remote', + sampling_rate=1000, + vendor='EyeLink', + model='EyeLink Portable Duo', + version='6.12', + ), + }, + ['Mount configuration: Remote vs. Desktop'], + id='eyetracker_mount', ), ], ) -def test_from_asc_raises_exception(kwargs, exception, message): - with pytest.raises(exception) as excinfo: - pm.gaze.from_asc(**kwargs) +def test_from_asc_detects_mismatches_in_experiment_metadata(experiment_kwargs, issues): + with pytest.raises(ValueError) as excinfo: + pm.gaze.from_asc( + 'tests/files/eyelink_monocular_example.asc', + experiment=pm.Experiment(**experiment_kwargs), + ) msg, = excinfo.value.args - assert msg == message + expected_msg = 'Experiment metadata does not match the metadata in the ASC file:\n' + expected_msg += '\n'.join(f'- {issue}' for issue in issues) + assert msg == expected_msg diff --git a/tests/unit/utils/parsing_test.py b/tests/unit/utils/parsing_test.py index f33e89fd..13dc2dd3 100644 --- a/tests/unit/utils/parsing_test.py +++ b/tests/unit/utils/parsing_test.py @@ -206,6 +206,42 @@ def test_parse_eyelink(tmp_path): assert metadata == EXPECTED_METADATA +@pytest.mark.parametrize( + ('kwargs', 'expected_metadata'), + [ + pytest.param( + { + 'filepath': 'tests/files/eyelink_monocular_example.asc', + 'metadata_patterns': [ + {'pattern': r'!V TRIAL_VAR SUBJECT_ID (?P-?\d+)'}, + r'!V TRIAL_VAR STIMULUS_COMBINATION_ID (?P.+)', + ], + }, + { + 'subject_id': '-1', + 'stimulus_combination_id': 'start', + }, + id='eyelink_asc_metadata_patterns', + ), + pytest.param( + { + 'filepath': 'tests/files/eyelink_monocular_example.asc', + 'metadata_patterns': [r'inexistent pattern (?P-?\d+)'], + }, + { + 'value': None, + }, + id='eyelink_asc_metadata_pattern_not_found', + ), + ], +) +def test_from_asc_metadata_patterns(kwargs, expected_metadata): + _, metadata = pm.utils.parsing.parse_eyelink(**kwargs) + + for key, value in expected_metadata.items(): + assert metadata[key] == value + + @pytest.mark.parametrize( 'patterns', [ From 085665856be01577f567d8167130a122178a095b Mon Sep 17 00:00:00 2001 From: SiQube Date: Sun, 17 Nov 2024 19:50:01 +0100 Subject: [PATCH 19/28] move split method to gaze dataframe --- src/pymovements/dataset/dataset.py | 40 ++++++++++--------------- src/pymovements/gaze/gaze_dataframe.py | 41 ++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 24 deletions(-) diff --git a/src/pymovements/dataset/dataset.py b/src/pymovements/dataset/dataset.py index 755892fd..8857b135 100644 --- a/src/pymovements/dataset/dataset.py +++ b/src/pymovements/dataset/dataset.py @@ -235,35 +235,27 @@ def _split_gaze_data( self, by: list[str] | str, ) -> None: - """Split gaze data into seperated GazeDataFrame's. + """Split gaze data into separated GazeDataFrame's. Parameters ---------- by: list[str] | str - Column's to split dataframe by. + Column(s) to split dataframe by. """ - if isinstance(by, str): - by = [by] - new_data = [ - ( - GazeDataFrame( - new_frame, - experiment=_frame.experiment, - trial_columns=self.definition.trial_columns, - time_column=self.definition.time_column, - time_unit=self.definition.time_unit, - position_columns=self.definition.position_columns, - velocity_columns=self.definition.velocity_columns, - acceleration_columns=self.definition.acceleration_columns, - distance_column=self.definition.distance_column, - ), - fileinfo_row, - ) - for (_frame, fileinfo_row) in zip(self.gaze, self.fileinfo['gaze'].to_dicts()) - for new_frame in _frame.frame.partition_by(by=by) - ] - self.gaze = [data[0] for data in new_data] - self.fileinfo['gaze'] = pl.concat([pl.from_dict(data[1]) for data in new_data]) + by = [by] if isinstance(by, str) else by + + fileinfo_dicts = self.fileinfo['gaze'].to_dicts() + + all_gaze_frames = [] + all_fileinfo_rows = [] + + for frame, fileinfo_row in zip(self.gaze, fileinfo_dicts): + split_frames = frame.split(by=by) + all_gaze_frames.extend(split_frames) + all_fileinfo_rows.extend([fileinfo_row] * len(split_frames)) + + self.gaze = all_gaze_frames + self.fileinfo['gaze'] = pl.concat([pl.from_dict(row) for row in all_fileinfo_rows]) def split_precomputed_events( self, diff --git a/src/pymovements/gaze/gaze_dataframe.py b/src/pymovements/gaze/gaze_dataframe.py index 7d81c962..3e1a934b 100644 --- a/src/pymovements/gaze/gaze_dataframe.py +++ b/src/pymovements/gaze/gaze_dataframe.py @@ -285,6 +285,14 @@ def __init__( # Remove this attribute once #893 is fixed self._metadata: dict[str, Any] | None = None + self.auto_column_detect = auto_column_detect + self.time_column = time_column + self.time_unit = time_unit + self.pixel_columns = pixel_columns + self.position_columns = position_columns + self.velocity_columns = velocity_columns + self.acceleration_columns = acceleration_columns + self.distance_column = distance_column def apply( self, @@ -307,6 +315,39 @@ def apply( else: raise ValueError(f"unsupported method '{function}'") + def split(self, by: list[str] | str) -> list[GazeDataFrame]: + """Split the GazeDataFrame into multiple frames based on specified column(s). + + Parameters + ---------- + by: list[str] | str + Column name(s) to split the DataFrame by. If a single string is provided, + it will be used as a single column name. If a list is provided, the DataFrame + will be split by unique combinations of values in all specified columns. + + Returns + ------- + list[GazeDataFrame] + A list of new GazeDataFrame instances, each containing a partition of the + original data with all metadata and configurations preserved. + """ + by = [by] if isinstance(by, str) else by + return [ + GazeDataFrame( + new_frame, + experiment=self.experiment, + auto_column_detect=self.auto_column_detect, + trial_columns=self.trial_columns, + time_column=self.time_column, + time_unit=self.time_unit, + position_columns=self.position_columns, + velocity_columns=self.velocity_columns, + acceleration_columns=self.acceleration_columns, + distance_column=self.distance_column, + ) + for new_frame in self.frame.partition_by(by=by) + ] + def transform( self, transform_method: str | Callable[..., pl.Expr], From b47ad312af69f19a1f7f4b50519bdd6c16014df6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 00:29:56 +0100 Subject: [PATCH 20/28] ci: pre-commit autoupdate (#899) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pappasam/toml-sort: v0.23.1 → v0.24.1](https://github.com/pappasam/toml-sort/compare/v0.23.1...v0.24.1) - [github.com/kynan/nbstripout: 0.8.0 → 0.8.1](https://github.com/kynan/nbstripout/compare/0.8.0...0.8.1) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 24f6d0ae..1af76586 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,7 +35,7 @@ repos: - id: databooks-meta args: [--cell-fields-keep=id] - repo: https://github.com/pappasam/toml-sort - rev: v0.23.1 + rev: v0.24.1 hooks: - id: toml-sort-fix - repo: https://github.com/dannysepler/rm_unneeded_f_str @@ -43,7 +43,7 @@ repos: hooks: - id: rm-unneeded-f-str - repo: https://github.com/kynan/nbstripout - rev: 0.8.0 + rev: 0.8.1 hooks: - id: nbstripout - repo: https://github.com/Lucas-C/pre-commit-hooks From 5f5525a001e3902bb865682cf1d5ad1af90bfc04 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 27 Nov 2024 09:02:08 +0100 Subject: [PATCH 21/28] ci: pre-commit autoupdate (#900) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pappasam/toml-sort: v0.24.1 → v0.24.2](https://github.com/pappasam/toml-sort/compare/v0.24.1...v0.24.2) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1af76586..7a9816ed 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,7 +35,7 @@ repos: - id: databooks-meta args: [--cell-fields-keep=id] - repo: https://github.com/pappasam/toml-sort - rev: v0.24.1 + rev: v0.24.2 hooks: - id: toml-sort-fix - repo: https://github.com/dannysepler/rm_unneeded_f_str From c30bd9e1c414e98a96995104d1935eb1c8ff10c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20S=C3=A4uberli?= <38892775+saeub@users.noreply.github.com> Date: Wed, 27 Nov 2024 10:15:39 +0100 Subject: [PATCH 22/28] Add trial_columns argument in from_asc() (#898) --- src/pymovements/gaze/io.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/pymovements/gaze/io.py b/src/pymovements/gaze/io.py index e68d46fd..aea49987 100644 --- a/src/pymovements/gaze/io.py +++ b/src/pymovements/gaze/io.py @@ -35,7 +35,7 @@ def from_csv( file: str | Path, experiment: Experiment | None = None, *, - trial_columns: list[str] | None = None, + trial_columns: str | list[str] | None = None, time_column: str | None = None, time_unit: str | None = 'ms', pixel_columns: list[str] | None = None, @@ -56,7 +56,7 @@ def from_csv( Path of gaze file. experiment : Experiment | None The experiment definition. (default: None) - trial_columns: list[str] | None + trial_columns: str | list[str] | None The name of the trial columns in the input data frame. If the list is empty or None, the input data frame is assumed to contain only one trial. If the list is not empty, the input data frame is assumed to contain multiple trials and the transformation @@ -276,6 +276,7 @@ def from_asc( metadata_patterns: list[dict[str, Any] | str] | None = None, schema: dict[str, Any] | None = None, experiment: Experiment | None = None, + trial_columns: str | list[str] | None = None, add_columns: dict[str, str] | None = None, column_schema_overrides: dict[str, Any] | None = None, ) -> GazeDataFrame: @@ -295,6 +296,11 @@ def from_asc( Dictionary to optionally specify types of columns parsed by patterns. (default: None) experiment: Experiment | None The experiment definition. (default: None) + trial_columns: str | list[str] | None + The names of the columns (extracted by patterns) to use as trial columns. + If the list is empty or None, the asc file is assumed to contain only one trial. + If the list is not empty, the asc file is assumed to contain multiple trials and + the transformation methods will be applied to each trial separately. (default: None) add_columns: dict[str, str] | None Dictionary containing columns to add to loaded data frame. (default: None) @@ -434,6 +440,7 @@ def from_asc( gaze_df = GazeDataFrame( gaze_data, experiment=experiment, + trial_columns=trial_columns, time_column='time', time_unit='ms', pixel_columns=['x_pix', 'y_pix'], From 7a25297887eb1347068745b72a8e6258fb0c3812 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 04:55:56 +0100 Subject: [PATCH 23/28] ci: pre-commit autoupdate (#902) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/PyCQA/pylint: v3.3.1 → v3.3.2](https://github.com/PyCQA/pylint/compare/v3.3.1...v3.3.2) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7a9816ed..7b3e398f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -121,7 +121,7 @@ repos: '--ignore=D103,D107,D213', ] - repo: https://github.com/PyCQA/pylint - rev: v3.3.1 + rev: v3.3.2 hooks: - id: pylint name: pylint From 96141d58548969db00694294cbaa9861f12673e1 Mon Sep 17 00:00:00 2001 From: "David R. Reich" <43832476+SiQube@users.noreply.github.com> Date: Sun, 8 Dec 2024 15:40:56 +0100 Subject: [PATCH 24/28] docs: add CITATION.cff (#901) --- CITATION.cff | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 CITATION.cff diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..2c8e6b9d --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,36 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +preferred-citation: + authors: + - family-names: "Krakowczyk" + given-names: "Daniel G." + - family-names: "Reich" + given-names: "David R." + - family-names: "Chwastek" + given-names: "Jakob" + - family-names: "Jakobi" + given-names: "Deboah N." + - family-names: "Prasse" + given-names: "Paul" + - family-names: "Süss" + given-names: "Assunta" + - family-names: "Turuta" + given-names: "Oleksii" + - family-names: "Kasprowski" + given-names: "Paweł" + - family-names: "Jäger" + given-names: "Lena A." + title: "pymovements: A Python Package for Processing Eye Movement Data" + version: 0.18.0 + type: conference-paper + doi: 10.1145/3588015.3590134 + url: "https://doi.org/10.1145/3588015.3590134" + year: 2023 + collection-type: proceedings + collection-title: "2023 Symposium on Eye Tracking Research and Applications" + collection-location: "Tubingen, Germany" + collection-series: "ETRA '23" + publisher: + name: "Association for Computing Machinery" + address: New York, NY, USA + isbn: "979-8-4007-0150-4/23/05" From 5bf55f1f338013324efa13f0c42b3a99ff525699 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 10 Dec 2024 08:26:47 +0100 Subject: [PATCH 25/28] ci: pre-commit autoupdate (#904) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/jsh9/pydoclint: 0.5.9 → 0.5.10](https://github.com/jsh9/pydoclint/compare/0.5.9...0.5.10) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7b3e398f..3bacd170 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -53,7 +53,7 @@ repos: args: [--use-current-year] types: [python] - repo: https://github.com/jsh9/pydoclint - rev: 0.5.9 + rev: 0.5.10 hooks: - id: pydoclint args: ["--config=pyproject.toml"] From e4b3e8f4ad0b7fc35d00642f0eb9f922505493ca Mon Sep 17 00:00:00 2001 From: "Daniel G. Krakowczyk" Date: Tue, 10 Dec 2024 08:42:56 +0100 Subject: [PATCH 26/28] ci: add dataset section to release drafter (#903) --- .github/release-drafter.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml index 653ca6fb..c7e9983f 100644 --- a/.github/release-drafter.yml +++ b/.github/release-drafter.yml @@ -15,6 +15,8 @@ categories: labels: deprecation - title: ✨ Enhancements labels: enhancement + - title: 📀 Datasets + labels: dataset - title: 🐞 Bug Fixes labels: fix - title: 🛠️ Maintenance @@ -34,20 +36,23 @@ change-template: '- $TITLE (#$NUMBER)' change-title-escapes: '\<*_&' replacers: # Remove conventional commits from titles - - search: '/- (build|chore|ci|depr|docs|feat|fix|perf|refactor|release|test)(\(.*\))?(\!)?\: /g' + - search: '/- (build|chore|ci|dataset|depr|docs|feat|fix|perf|refactor|release|test)(\(.*\))?(\!)?\: /g' replace: '- ' autolabeler: - label: breaking title: # Example: feat!: ... - - '/^(build|chore|ci|depr|docs|feat|fix|perf|refactor|release|test)(\(.*\))?\!\: /' + - '/^(build|chore|ci|dataset|depr|docs|feat|fix|perf|refactor|release|test)(\(.*\))?\!\: /' - label: build title: - '/^build/' - label: internal title: - '/^(chore|ci|perf|refactor|test)/' + - label: dataset + title: + - '/^dataset/' - label: deprecation title: - '/^depr/' From eb8aee559d6b7eeff8f9ce7768dcbe312ca6ffa7 Mon Sep 17 00:00:00 2001 From: SiQube Date: Sun, 17 Nov 2024 19:50:01 +0100 Subject: [PATCH 27/28] move split method to gaze dataframe --- src/pymovements/dataset/dataset.py | 4 +--- src/pymovements/gaze/gaze_dataframe.py | 17 ++--------------- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/src/pymovements/dataset/dataset.py b/src/pymovements/dataset/dataset.py index 8857b135..9a309c7a 100644 --- a/src/pymovements/dataset/dataset.py +++ b/src/pymovements/dataset/dataset.py @@ -231,7 +231,7 @@ def load_precomputed_reading_measures(self) -> None: self.paths, ) - def _split_gaze_data( + def split_gaze_data( self, by: list[str] | str, ) -> None: @@ -242,8 +242,6 @@ def _split_gaze_data( by: list[str] | str Column(s) to split dataframe by. """ - by = [by] if isinstance(by, str) else by - fileinfo_dicts = self.fileinfo['gaze'].to_dicts() all_gaze_frames = [] diff --git a/src/pymovements/gaze/gaze_dataframe.py b/src/pymovements/gaze/gaze_dataframe.py index 3e1a934b..e9ee47c3 100644 --- a/src/pymovements/gaze/gaze_dataframe.py +++ b/src/pymovements/gaze/gaze_dataframe.py @@ -286,13 +286,6 @@ def __init__( # Remove this attribute once #893 is fixed self._metadata: dict[str, Any] | None = None self.auto_column_detect = auto_column_detect - self.time_column = time_column - self.time_unit = time_unit - self.pixel_columns = pixel_columns - self.position_columns = position_columns - self.velocity_columns = velocity_columns - self.acceleration_columns = acceleration_columns - self.distance_column = distance_column def apply( self, @@ -331,19 +324,13 @@ def split(self, by: list[str] | str) -> list[GazeDataFrame]: A list of new GazeDataFrame instances, each containing a partition of the original data with all metadata and configurations preserved. """ - by = [by] if isinstance(by, str) else by return [ GazeDataFrame( new_frame, experiment=self.experiment, - auto_column_detect=self.auto_column_detect, trial_columns=self.trial_columns, - time_column=self.time_column, - time_unit=self.time_unit, - position_columns=self.position_columns, - velocity_columns=self.velocity_columns, - acceleration_columns=self.acceleration_columns, - distance_column=self.distance_column, + time_column='time', + distance_column='distance', ) for new_frame in self.frame.partition_by(by=by) ] From ecd6b5c13f2714250c879c8c60f136e09a994e67 Mon Sep 17 00:00:00 2001 From: SiQube Date: Sun, 29 Dec 2024 21:56:40 +0100 Subject: [PATCH 28/28] add tests for number of split files only one trial id per gaze dataframe --- tests/unit/dataset/dataset_test.py | 2 +- tests/unit/gaze/gaze_dataframe_test.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/unit/dataset/dataset_test.py b/tests/unit/dataset/dataset_test.py index 17f3e97a..e808292c 100644 --- a/tests/unit/dataset/dataset_test.py +++ b/tests/unit/dataset/dataset_test.py @@ -1977,5 +1977,5 @@ def test_load_split_precomputed_events(precomputed_dataset_configuration, by, ex def test_load_split_gaze(gaze_dataset_configuration, by, expected_len): dataset = pm.Dataset(**gaze_dataset_configuration['init_kwargs']) dataset.load() - dataset._split_gaze_data(by) + dataset.split_gaze_data(by) assert len(dataset.gaze) == expected_len diff --git a/tests/unit/gaze/gaze_dataframe_test.py b/tests/unit/gaze/gaze_dataframe_test.py index d1dd5998..8e91ee03 100644 --- a/tests/unit/gaze/gaze_dataframe_test.py +++ b/tests/unit/gaze/gaze_dataframe_test.py @@ -205,3 +205,25 @@ def test_gaze_dataframe_copy_no_experiment(): # We want to have separate experiment instances but the same values. assert gaze.experiment is gaze_copy.experiment + + +def test_gaze_dataframe_split(): + gaze = pm.GazeDataFrame( + pl.DataFrame( + { + 'x': [0, 1, 2, 3], + 'y': [1, 1, 0, 0], + 'trial_id': [0, 1, 1, 2], + }, + schema={'x': pl.Float64, 'y': pl.Float64, 'trial_id': pl.Int8}, + ), + experiment=None, + position_columns=['x', 'y'], + ) + + split_gaze = gaze.split('trial_id') + assert all(gaze_df.frame.n_unique('trial_id') == 1 for gaze_df in split_gaze) + assert len(split_gaze) == 3 + assert_frame_equal(gaze.frame.filter(pl.col('trial_id') == 0), split_gaze[0].frame) + assert_frame_equal(gaze.frame.filter(pl.col('trial_id') == 1), split_gaze[1].frame) + assert_frame_equal(gaze.frame.filter(pl.col('trial_id') == 2), split_gaze[2].frame)