Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for multiple recording specs per file to gaze.from_asc() #887

Draft
wants to merge 24 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
92f5b21
adapted parsing.py to record in the metadata dict all tracked eye sid…
saphjra Oct 25, 2024
8b99481
Merge remote-tracking branch 'origin/main' into feature/metadata
saphjra Oct 25, 2024
a3f70c1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 25, 2024
1b6c59d
changed the Regex patter matching logic such that all recording confi…
saphjra Oct 25, 2024
dcc2d64
Merge branch 'main' into feature/metadata
saphjra Nov 6, 2024
78077e5
changed the Regex patter matching logic such that all recording confi…
saphjra Nov 6, 2024
14a6f9f
Merge remote-tracking branch 'origin/feature/metadata' into feature/m…
saphjra Nov 6, 2024
c11db5d
trying to solve the test situation
saphjra Nov 6, 2024
10cce18
changed _calculate_data_loss sampling_rate variable, to reflect the c…
saphjra Nov 7, 2024
86f67b2
changed _calculate_data_loss sampling_rate variable, to reflect the c…
saphjra Nov 7, 2024
8542fc1
changed _calculate_data_loss sampling_rate variable, to reflect the c…
saphjra Nov 7, 2024
95f6b3e
changed _calculate_data_loss sampling_rate variable, to reflect the c…
saphjra Nov 7, 2024
ad168c8
changed _calculate_data_loss sampling_rate variable, to reflect the c…
saphjra Nov 7, 2024
ec9f99d
changed _calculate_data_loss sampling_rate variable, to reflect the c…
saphjra Nov 7, 2024
18414fb
changed _calculate_data_loss sampling_rate variable, to reflect the c…
saphjra Nov 7, 2024
8d1a7e6
changed _calculate_data_loss sampling_rate variable, to reflect the c…
saphjra Nov 7, 2024
8dafbee
adapted io.py to pass tests
saphjra Nov 7, 2024
0267ad1
adapted io.py to pass tests
saphjra Nov 7, 2024
253d9f2
modified parsing_test.py according to comments
saphjra Nov 20, 2024
67d07de
added consistency check for sampling rate
saphjra Nov 20, 2024
034c5ab
changed inconsistency_check to include a print statement.
saphjra Nov 20, 2024
25a8693
changed inconsistency_check to include a print statement.
saphjra Nov 20, 2024
9acb926
changed inconsistency_check to include a print statement.
saphjra Nov 20, 2024
5e4c659
changed inconsistency_check to include a print statement.
saphjra Nov 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/pymovements/gaze/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def from_asc(
│ 2339290 ┆ 618.0 ┆ [637.6, 531.4] │
│ 2339291 ┆ 618.0 ┆ [637.3, 531.2] │
└─────────┴───────┴────────────────┘
>>> metadata['sampling_rate']
>>> float(metadata["recording_config"][0]['sampling_rate'])
1000.0
"""
if isinstance(patterns, str):
Expand Down
64 changes: 42 additions & 22 deletions src/pymovements/utils/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,6 @@
),
r'\*\*\s+(?P<version_2>EYELINK.*)',
r'MSG\s+\d+[.]?\d*\s+DISPLAY_COORDS\s+(?P<resolution>.*)',
(
r'MSG\s+\d+[.]?\d*\s+RECCFG\s+(?P<tracking_mode>[A-Z,a-z]+)\s+'
r'(?P<sampling_rate>\d+)\s+'
r'(?P<file_sample_filter>(0|1|2))\s+'
r'(?P<link_sample_filter>(0|1|2))\s+'
r'(?P<tracked_eye>(L|R|LR))\s*'
),
r'PUPIL\s+(?P<pupil_data_type>(AREA|DIAMETER))\s*',
r'MSG\s+\d+[.]?\d*\s+ELCLCFG\s+(?P<mount_configuration>.*)',
)
Expand Down Expand Up @@ -93,6 +86,14 @@
r'END\s+(?P<timestamp>(\d+[.]?\d*))\s+\s+(?P<types>.*)\s+RES\s+'
r'(?P<xres>[\d\.]*)\s+(?P<yres>[\d\.]*)\s*',
)
RECORDING_CONFIG = re.compile(
r'MSG\s+(?P<timestamp>\d+[.]?\d*)\s+'
r'RECCFG\s+(?P<tracking_mode>[A-Z,a-z]+)\s+'
r'(?P<sampling_rate>\d+)\s+'
r'(?P<file_sample_filter>0|1|2)\s+'
r'(?P<link_sample_filter>0|1|2)\s+'
r'(?P<tracked_eye>LR|[LR])\s*',
)


def check_nan(sample_location: str) -> float:
Expand Down Expand Up @@ -247,15 +248,14 @@ def parse_eyelink(
calibrations = []
blinks = []
invalid_samples = []

recording_config = []
blink = False

start_recording_timestamp = ''
total_recording_duration = 0.0
num_blink_samples = 0

for line in lines:

for pattern_dict in compiled_patterns:

if match := pattern_dict['pattern'].match(line):
Expand Down Expand Up @@ -296,6 +296,9 @@ def parse_eyelink(
num_blink_samples = 0
blinks.append(blink_info)

elif eye_side_match := RECORDING_CONFIG.match(line):
recording_config.append(eye_side_match.groupdict())

elif match := START_RECORDING_REGEX.match(line):
start_recording_timestamp = match.groupdict()['timestamp']

Expand Down Expand Up @@ -349,19 +352,20 @@ def parse_eyelink(
# each metadata pattern should only match once
compiled_metadata_patterns.remove(pattern_dict)

if not metadata:
raise Warning('No metadata found. Please check the file for errors.')

# if the sampling rate is not found, we cannot calculate the data loss
actual_number_of_samples = len(samples['time'])
# if we don't have any recording config, we cannot calculate the data loss
sampling_rate = _check_sampling_rate(recording_config)

data_loss_ratio, data_loss_ratio_blinks = _calculate_data_loss(
blinks=blinks,
invalid_samples=invalid_samples,
actual_num_samples=actual_number_of_samples,
total_rec_duration=total_recording_duration,
sampling_rate=metadata['sampling_rate'],
sampling_rate=sampling_rate,
)
if not metadata:
raise Warning('No metadata found. Please check the file for errors.')

pre_processed_metadata: dict[str, Any] = _pre_process_metadata(metadata)
# is not yet pre-processed but should be
Expand All @@ -371,6 +375,7 @@ def parse_eyelink(
pre_processed_metadata['data_loss_ratio'] = data_loss_ratio
pre_processed_metadata['data_loss_ratio_blinks'] = data_loss_ratio_blinks
pre_processed_metadata['total_recording_duration_ms'] = total_recording_duration
pre_processed_metadata['recording_config'] = recording_config

schema_overrides = {
'time': pl.Float64,
Expand Down Expand Up @@ -409,11 +414,6 @@ def _pre_process_metadata(metadata: defaultdict[str, Any]) -> dict[str, Any]:
resolution = (coordinates[2] - coordinates[0] + 1, coordinates[3] - coordinates[1] + 1)
metadata['resolution'] = resolution

if metadata['sampling_rate']:
metadata['sampling_rate'] = float(metadata['sampling_rate'])
else:
metadata['sampling_rate'] = 'unknown'

# if the date has been parsed fully, convert the date to a datetime object
if 'day' in metadata and 'year' in metadata and 'month' in metadata and 'time' in metadata:
metadata['day'] = int(metadata['day'])
Expand All @@ -433,12 +433,32 @@ def _pre_process_metadata(metadata: defaultdict[str, Any]) -> dict[str, Any]:
return return_metadata


def _check_sampling_rate(recording_config: list[dict[str, Any]]) -> float | None:
"""Check if the sampling rate is available in the recording config.

Parameters
----------
recording_config : list[dict[str, Any]]
List of dictionaries containing recording configuration details.

Returns
-------
float | None
The sampling rate as a float if available, otherwise None.
"""
if not recording_config:
sampling_rate = None
else:
sampling_rate = float(recording_config[0]['sampling_rate'])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's sufficient for now to just check for consistency and raise a warning if it's inconsistent.

We can improve on that in a follow-up. Moreover, the logic for calculating data loss will be moved away from this module into the measure module. This way users will be able to calculate these measures on any GazeDataFrame not just when parsed via from_asc().

return sampling_rate


def _calculate_data_loss(
blinks: list[dict[str, Any]],
invalid_samples: list[str],
actual_num_samples: int,
total_rec_duration: float,
sampling_rate: float,
total_rec_duration: float | None = None,
sampling_rate: float | None = None,
) -> tuple[float | str, float | str]:
"""Calculate data loss and blink loss.

Expand All @@ -450,9 +470,9 @@ def _calculate_data_loss(
List of invalid samples.
actual_num_samples: int
Number of actual samples recorded.
total_rec_duration: float
total_rec_duration: float | None
Total duration of the recording.
sampling_rate: float
sampling_rate: float | None
Sampling rate of the eye tracker.

Returns
Expand Down
66 changes: 56 additions & 10 deletions tests/unit/utils/parsing_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024 The pymovements Project Authors
# Copyright (c) 2024 The pymovements Project Authors
#
saphjra marked this conversation as resolved.
Show resolved Hide resolved
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand All @@ -17,6 +17,25 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# Copyright (c) 2023-2024 The pymovements Project Authors
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# git
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""Tests pymovements asc to csv processing."""
import datetime
from pathlib import Path
Expand Down Expand Up @@ -65,6 +84,7 @@
START 10000004 RIGHT SAMPLES EVENTS
10000004 850.7 717.5 714.0 0.0 ...
END 10000005 SAMPLES EVENTS RES 38.54 31.12
MSG 10000013.5 RECCFG CR 2000 1 1 R
MSG 10000005 METADATA_1 123
MSG 10000005 START_B
the next line now should have the task column set to B
Expand Down Expand Up @@ -102,6 +122,7 @@
EBLINK R 10000018 10000020 2
10000021 . . 0.0 0.0 ...
END 10000022 SAMPLES EVENTS RES 38.54 31.12

"""

PATTERNS = [
Expand Down Expand Up @@ -160,12 +181,7 @@
'version_2': 'EYELINK II CL v6.12 Feb 1 2018 (EyeLink Portable Duo)',
'model': 'EyeLink Portable Duo',
'version_number': '6.12',
'sampling_rate': 1000.00,
'file_sample_filter': '2',
'link_sample_filter': '1',
'pupil_data_type': 'AREA',
'tracking_mode': 'CR',
'tracked_eye': 'L',
'calibrations': [],
'validations': [],
'resolution': (1280, 1024),
Expand All @@ -189,6 +205,24 @@
'metadata_2': 'abc',
'metadata_3': True,
'metadata_4': None,
'recording_config': [
{
'sampling_rate': '1000', # MSG 2154555 RECCFG CR 1000 2 1 L
'file_sample_filter': '2',
'link_sample_filter': '1',
'timestamp': '2154555',
'tracked_eye': 'L',
'tracking_mode': 'CR',
},
{
'sampling_rate': '2000', # MSG 10000013.5 RECCFG CR 2000 1 1 R
'file_sample_filter': '1',
'link_sample_filter': '1',
'timestamp': '10000013.5',
'tracked_eye': 'R',
'tracking_mode': 'CR',
},
],
}


Expand Down Expand Up @@ -425,7 +459,9 @@ def test_parse_val_cal_eyelink_monocular_file():
('metadata', 'expected_blinks'),
[
pytest.param(
'MSG 2154555 RECCFG CR 1000 2 1 L\n'
'** DATE: Wed Mar 8 09:25:20 2023\n'
'MSG 2154555 RECCFG CR 1000 2 1 L\n'
'EVENTS GAZE LEFT RATE 1000.00 TRACKING CR FILTER 2\n'
'SBLINK R 10000018\n'
'10000019 . . 0.0 0.0 ...\n'
Expand Down Expand Up @@ -497,15 +533,17 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks):
('metadata', 'expected_blink_ratio', 'expected_overall_ratio'),
[
pytest.param(
'** DATE: Wed Mar 8 09:25:20 2023\n'
'MSG 2154555 RECCFG CR 1000 2 1 L\n'
'START 10000018 RIGHT SAMPLES EVENTS\n'
'SBLINK R 10000018\n'
'10000019 . . 0.0 0.0 ...\n'
'10000020 . . 0.0 0.0 ...\n'
'EBLINK R 10000018 10000020 2\n'
'END 10000020 SAMPLES EVENTS RES 38.54 31.12\n',
1,
1,
# asc snipped which gets processed by the function
1, # expected_blink_ratio
1, # expected_overall_ratio
id='only_blinks',
),
pytest.param(
Expand All @@ -521,6 +559,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks):
id='unknown_sampling_rate_only_blinks',
),
pytest.param(
'** DATE: Wed Mar 8 09:25:20 2023\n'
'MSG 2154555 RECCFG CR 1000 2 1 L\n'
'START 10000018 RIGHT SAMPLES EVENTS\n'
'10000019 . . 0.0 0.0 ...\n'
Expand All @@ -530,6 +569,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks):
id='lost_samples_no_blinks',
),
pytest.param(
'** DATE: Wed Mar 8 09:25:20 2023\n'
'MSG 2154555 RECCFG CR 1000 2 1 L\n'
'START 10000018 RIGHT SAMPLES EVENTS\n'
'SBLINK R 10000018\n'
Expand All @@ -552,6 +592,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks):
id='lost_samples_no_sampling_rate',
),
pytest.param(
'** DATE: Wed Mar 8 09:25:20 2023\n'
'MSG 2154555 RECCFG CR 1000 2 1 L\n'
'START 10000018 RIGHT SAMPLES EVENTS\n'
'10000019 850.7 717.5 714.0 0.0 ...\n'
Expand All @@ -563,6 +604,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks):
id='missing_timestamps',
),
pytest.param(
'** DATE: Wed Mar 8 09:25:20 2023\n'
'MSG 2154555 RECCFG CR 1000 2 1 L\n'
'START 10000018 RIGHT SAMPLES EVENTS\n'
'10000019 850.7 717.5 714.0 0.0 ...\n'
Expand All @@ -574,6 +616,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks):
id='missing_timestamps_lost_samples',
),
pytest.param(
'** DATE: Wed Mar 8 09:25:20 2023\n'
'MSG 2154555 RECCFG CR 1000 2 1 L\n'
'START 10000020 RIGHT SAMPLES EVENTS\n'
'10000020 850.7 717.5 714.0 0.0 ...\n'
Expand All @@ -584,9 +627,10 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks):
'END 10000024 SAMPLES EVENTS RES 38.54 31.12\n',
0.25,
0.75,
id='missing_timestamps_lost_samples',
id='missing_timestamps_lost_samples4',
),
saphjra marked this conversation as resolved.
Show resolved Hide resolved
pytest.param(
'** DATE: Wed Mar 8 09:25:20 2023\n'
'MSG 2154555 RECCFG CR 1000 2 1 L\n'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these added date strings aren't really necessary, right?
It really doesn't matter in this case and you don't need to revert them, but usually I would advise to avoid changes to existing test logic, e.g. changing test values.

Your other changes here, like adding documentation or changing test ids, are of course the spirit that we need! 🥇

Copy link
Collaborator Author

@saphjra saphjra Nov 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for all your Feedback 😄

The thing is, without this additional line, the metadata will be empty, since the MSG line gets parsed into the recording_config and not metadata anymore. However if metadata is empty/ none the code will raise a warning and all the tests, where I added the Date line, will fail due to that.

line 367
"""
if not metadata:
raise Warning('No metadata found. Please check the file for errors.')
"""
So I figured, I add a line that should be present in any dataset, which gets parsed by the metadata.

Is there a better way, to solve this?

'SBLINK R 10000018\n'
'10000019 . . 0.0 0.0 ...\n'
Expand All @@ -599,12 +643,13 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks):
id='blinks_and_lost_samples_no_start_end',
),
pytest.param(
'** DATE: Wed Mar 8 09:25:20 2023\n'
'MSG 2154555 RECCFG CR 1000 2 1 L\n'
'START 10000020 RIGHT SAMPLES EVENTS\n'
'END 10000021 SAMPLES EVENTS RES 38.54 31.12\n',
0,
1,
id='missing_timestamps_lost_samples',
id='missing_timestamps_lost_samples12',
),
],
)
Expand All @@ -615,6 +660,7 @@ def test_parse_eyelink_data_loss_ratio(
filepath.write_text(metadata)

_, parsed_metadata = pm.utils.parsing.parse_eyelink(filepath)
print(f'parsed_metadata: {parsed_metadata}, \nmetadata: {metadata}')

saphjra marked this conversation as resolved.
Show resolved Hide resolved
assert parsed_metadata['data_loss_ratio_blinks'] == expected_blink_ratio
assert parsed_metadata['data_loss_ratio'] == expected_overall_ratio
Expand Down
Loading