From 92f5b2112b5e588ae4b1fcfdf3e3ef452db70d70 Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Fri, 25 Oct 2024 11:45:11 +0200 Subject: [PATCH 01/21] adapted parsing.py to record in the metadata dict all tracked eye side, instead of only one --- src/pymovements/utils/parsing.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index 849cb864..f2a57afc 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -53,7 +53,6 @@ r'(?P\d+)\s+' r'(?P(0|1|2))\s+' r'(?P(0|1|2))\s+' - r'(?P(L|R|LR))\s*' ), r'PUPIL\s+(?P(AREA|DIAMETER))\s*', r'MSG\s+\d+[.]?\d*\s+ELCLCFG\s+(?P.*)', @@ -93,8 +92,11 @@ r'END\s+(?P(\d+[.]?\d*))\s+\s+(?P.*)\s+RES\s+' r'(?P[\d\.]*)\s+(?P[\d\.]*)\s*', ) - - +TRACKED_EYE_REGEX = re.compile( + r'MSG\s+(?P\d+[.]?\d*)\s+' + r'RECCFG\s+[A-Z,a-z]+\s+\d+\s+(0|1|2)\s+(0|1|2)\s+' + r'(?P(L|R|LR))\s*', +) def check_nan(sample_location: str) -> float: """Return position as float or np.nan depending on validity of sample. @@ -247,7 +249,7 @@ def parse_eyelink( calibrations = [] blinks = [] invalid_samples = [] - + tracked_eyes = [] blink = False start_recording_timestamp = '' @@ -305,6 +307,10 @@ def parse_eyelink( total_recording_duration += block_duration + elif eye_side_match := TRACKED_EYE_REGEX.match(line): + tracked_eyes.append(eye_side_match.groupdict()) + + elif eye_tracking_sample_match := EYE_TRACKING_SAMPLE.match(line): timestamp_s = eye_tracking_sample_match.group('time') @@ -371,6 +377,7 @@ def parse_eyelink( pre_processed_metadata['data_loss_ratio'] = data_loss_ratio pre_processed_metadata['data_loss_ratio_blinks'] = data_loss_ratio_blinks pre_processed_metadata['total_recording_duration_ms'] = total_recording_duration + pre_processed_metadata['tracked_eyes'] = tracked_eyes schema_overrides = { 'time': pl.Float64, From a3f70c19fb51526030193041e963210da166afaf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 25 Oct 2024 10:49:58 +0000 Subject: [PATCH 02/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pymovements/utils/parsing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index f2a57afc..2969182d 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -97,6 +97,8 @@ r'RECCFG\s+[A-Z,a-z]+\s+\d+\s+(0|1|2)\s+(0|1|2)\s+' r'(?P(L|R|LR))\s*', ) + + def check_nan(sample_location: str) -> float: """Return position as float or np.nan depending on validity of sample. @@ -310,7 +312,6 @@ def parse_eyelink( elif eye_side_match := TRACKED_EYE_REGEX.match(line): tracked_eyes.append(eye_side_match.groupdict()) - elif eye_tracking_sample_match := EYE_TRACKING_SAMPLE.match(line): timestamp_s = eye_tracking_sample_match.group('time') From 1b6c59d760595c14c0eb7195e1b4b5c9bf967068 Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Fri, 25 Oct 2024 14:53:09 +0200 Subject: [PATCH 03/21] changed the Regex patter matching logic such that all recording configuration are match continously instead of only once --- src/pymovements/utils/parsing.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index 2969182d..2878e010 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -48,12 +48,7 @@ ), r'\*\*\s+(?PEYELINK.*)', r'MSG\s+\d+[.]?\d*\s+DISPLAY_COORDS\s+(?P.*)', - ( - r'MSG\s+\d+[.]?\d*\s+RECCFG\s+(?P[A-Z,a-z]+)\s+' - r'(?P\d+)\s+' - r'(?P(0|1|2))\s+' - r'(?P(0|1|2))\s+' - ), + r'PUPIL\s+(?P(AREA|DIAMETER))\s*', r'MSG\s+\d+[.]?\d*\s+ELCLCFG\s+(?P.*)', ) @@ -92,10 +87,13 @@ r'END\s+(?P(\d+[.]?\d*))\s+\s+(?P.*)\s+RES\s+' r'(?P[\d\.]*)\s+(?P[\d\.]*)\s*', ) -TRACKED_EYE_REGEX = re.compile( - r'MSG\s+(?P\d+[.]?\d*)\s+' - r'RECCFG\s+[A-Z,a-z]+\s+\d+\s+(0|1|2)\s+(0|1|2)\s+' - r'(?P(L|R|LR))\s*', +RECORDING_CONFIG = re.compile( + r'MSG\s+(?P\d+[.]?\d*)\s+' + r'RECCFG\s+(?P[A-Z,a-z]+)\s+' + r'(?P\d+)\s+' + r'(?P(0|1|2))\s+' + r'(?P(0|1|2))\s+' + r'(?P(L|R|LR))\s*' ) @@ -251,7 +249,7 @@ def parse_eyelink( calibrations = [] blinks = [] invalid_samples = [] - tracked_eyes = [] + recording_config = [] blink = False start_recording_timestamp = '' @@ -309,8 +307,8 @@ def parse_eyelink( total_recording_duration += block_duration - elif eye_side_match := TRACKED_EYE_REGEX.match(line): - tracked_eyes.append(eye_side_match.groupdict()) + elif eye_side_match := RECORDING_CONFIG.match(line): + recording_config.append(eye_side_match.groupdict()) elif eye_tracking_sample_match := EYE_TRACKING_SAMPLE.match(line): @@ -378,7 +376,7 @@ def parse_eyelink( pre_processed_metadata['data_loss_ratio'] = data_loss_ratio pre_processed_metadata['data_loss_ratio_blinks'] = data_loss_ratio_blinks pre_processed_metadata['total_recording_duration_ms'] = total_recording_duration - pre_processed_metadata['tracked_eyes'] = tracked_eyes + pre_processed_metadata['recording_config'] = recording_config schema_overrides = { 'time': pl.Float64, From 78077e56e7efe7d3b36ed212f98f82865e72eec8 Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Wed, 6 Nov 2024 11:36:00 +0100 Subject: [PATCH 04/21] changed the Regex patter matching logic such that all recording configuration are match continously instead of only once --- src/pymovements/utils/parsing.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index 2878e010..4a1bde57 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -88,12 +88,12 @@ r'(?P[\d\.]*)\s+(?P[\d\.]*)\s*', ) RECORDING_CONFIG = re.compile( - r'MSG\s+(?P\d+[.]?\d*)\s+' - r'RECCFG\s+(?P[A-Z,a-z]+)\s+' - r'(?P\d+)\s+' - r'(?P(0|1|2))\s+' - r'(?P(0|1|2))\s+' - r'(?P(L|R|LR))\s*' + r'MSG\s+(?P\d+[.]?\d*)\s+' + r'RECCFG\s+(?P[A-Z,a-z]+)\s+' + r'(?P\d+)\s+' + r'(?P(0|1|2))\s+' + r'(?P(0|1|2))\s+' + r'(?P(L|R|LR))\s*', ) From c11db5d7b04bdf2af82e4655d2291930a498af4e Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Wed, 6 Nov 2024 16:34:25 +0100 Subject: [PATCH 05/21] trying to solve the test situation --- src/pymovements/utils/parsing.py | 32 +++++++++++----- tests/unit/utils/parsing_test.py | 66 +++++++++++++++++++------------- 2 files changed, 62 insertions(+), 36 deletions(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index 4a1bde57..3532ce0d 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -48,7 +48,6 @@ ), r'\*\*\s+(?PEYELINK.*)', r'MSG\s+\d+[.]?\d*\s+DISPLAY_COORDS\s+(?P.*)', - r'PUPIL\s+(?P(AREA|DIAMETER))\s*', r'MSG\s+\d+[.]?\d*\s+ELCLCFG\s+(?P.*)', ) @@ -93,7 +92,7 @@ r'(?P\d+)\s+' r'(?P(0|1|2))\s+' r'(?P(0|1|2))\s+' - r'(?P(L|R|LR))\s*', + r'(?P(L|R|S))\s*', ) @@ -232,10 +231,11 @@ def parse_eyelink( with open(filepath, encoding='ascii') as asc_file: lines = asc_file.readlines() + print(" ascifile lines", lines) # will return an empty string if the key does not exist metadata: defaultdict = defaultdict(str) - + print("metadat initial: ", metadata) # metadata keys specified by the user should have a default value of None metadata_keys = get_pattern_keys(compiled_metadata_patterns, 'key') for key in metadata_keys: @@ -257,9 +257,10 @@ def parse_eyelink( num_blink_samples = 0 for line in lines: - + print("line in asci file: ", line) for pattern_dict in compiled_patterns: + if match := pattern_dict['pattern'].match(line): if 'value' in pattern_dict: current_column = pattern_dict['column'] @@ -284,10 +285,12 @@ def parse_eyelink( cal_timestamp = '' elif BLINK_START_REGEX.match(line): + print('blink start') blink = True elif match := BLINK_STOP_REGEX.match(line): blink = False + print('blink end') parsed_blink = match.groupdict() blink_info = { 'start_timestamp': float(parsed_blink['timestamp_start']), @@ -297,6 +300,7 @@ def parse_eyelink( } num_blink_samples = 0 blinks.append(blink_info) + print('blinks: ', blinks) elif match := START_RECORDING_REGEX.match(line): start_recording_timestamp = match.groupdict()['timestamp'] @@ -307,8 +311,6 @@ def parse_eyelink( total_recording_duration += block_duration - elif eye_side_match := RECORDING_CONFIG.match(line): - recording_config.append(eye_side_match.groupdict()) elif eye_tracking_sample_match := EYE_TRACKING_SAMPLE.match(line): @@ -352,10 +354,17 @@ def parse_eyelink( metadata.update(match.groupdict()) # each metadata pattern should only match once + print("comqiled metadata patterns before removing: ", compiled_metadata_patterns) compiled_metadata_patterns.remove(pattern_dict) + print("comqiled metadata patternsafter removing : ", compiled_metadata_patterns) + + elif eye_side_match := RECORDING_CONFIG.match(line): + print("i matched recording config") + recording_config.append(eye_side_match.groupdict()) + + print("recording config:", recording_config, "Regex: ", RECORDING_CONFIG) + print("metadata: ", metadata) - if not metadata: - raise Warning('No metadata found. Please check the file for errors.') # if the sampling rate is not found, we cannot calculate the data loss actual_number_of_samples = len(samples['time']) @@ -365,7 +374,7 @@ def parse_eyelink( invalid_samples=invalid_samples, actual_num_samples=actual_number_of_samples, total_rec_duration=total_recording_duration, - sampling_rate=metadata['sampling_rate'], + sampling_rate=recording_config[0]['sampling_rate'], ) pre_processed_metadata: dict[str, Any] = _pre_process_metadata(metadata) @@ -377,7 +386,9 @@ def parse_eyelink( pre_processed_metadata['data_loss_ratio_blinks'] = data_loss_ratio_blinks pre_processed_metadata['total_recording_duration_ms'] = total_recording_duration pre_processed_metadata['recording_config'] = recording_config - + print("pre_processed_metadata: ", pre_processed_metadata) + if not metadata: + raise Warning('No metadata found. Please check the file for errors.') schema_overrides = { 'time': pl.Float64, 'x_pix': pl.Float64, @@ -467,6 +478,7 @@ def _calculate_data_loss( Data loss ratio and blink loss ratio. """ if not sampling_rate or not total_rec_duration: + print(f'samlpingrate {sampling_rate}, total_rec_tim:', total_rec_duration) return 'unknown', 'unknown' dl_ratio_blinks = 0.0 diff --git a/tests/unit/utils/parsing_test.py b/tests/unit/utils/parsing_test.py index f33e89fd..d551f40a 100644 --- a/tests/unit/utils/parsing_test.py +++ b/tests/unit/utils/parsing_test.py @@ -65,6 +65,7 @@ START 10000004 RIGHT SAMPLES EVENTS 10000004 850.7 717.5 714.0 0.0 ... END 10000005 SAMPLES EVENTS RES 38.54 31.12 +MSG 10000005 RECCFG CR 2000 1 1 R MSG 10000005 METADATA_1 123 MSG 10000005 START_B the next line now should have the task column set to B @@ -102,6 +103,7 @@ EBLINK R 10000018 10000020 2 10000021 . . 0.0 0.0 ... END 10000022 SAMPLES EVENTS RES 38.54 31.12 + """ PATTERNS = [ @@ -160,12 +162,7 @@ 'version_2': 'EYELINK II CL v6.12 Feb 1 2018 (EyeLink Portable Duo)', 'model': 'EyeLink Portable Duo', 'version_number': '6.12', - 'sampling_rate': 1000.00, - 'file_sample_filter': '2', - 'link_sample_filter': '1', 'pupil_data_type': 'AREA', - 'tracking_mode': 'CR', - 'tracked_eye': 'L', 'calibrations': [], 'validations': [], 'resolution': (1280, 1024), @@ -189,7 +186,21 @@ 'metadata_2': 'abc', 'metadata_3': True, 'metadata_4': None, -} + 'recording_config': [{ + 'sampling_rate': '1000', # MSG 2154555 RECCFG CR 1000 2 1 L + 'file_sample_filter': '2', + 'link_sample_filter': '1', + 'timestamp': '2154555', + 'tracked_eye': 'L', + 'tracking_mode': 'CR', + }, + {'sampling_rate': '2000', # MSG 10000013.5 RECCFG CR 2000 1 1 R + 'file_sample_filter': '1', + 'link_sample_filter': '1', + 'timestamp': '10000013.5', + 'tracked_eye': 'R', + 'tracking_mode': 'CR', + }]} def test_parse_eyelink(tmp_path): @@ -401,7 +412,7 @@ def test_val_cal_eyelink(tmp_path, metadata, expected_validation, expected_calib def test_parse_val_cal_eyelink_monocular_file(): example_asc_monocular_path = Path(__file__).parent.parent.parent / \ - 'files/eyelink_monocular_example.asc' + 'files/eyelink_monocular_example.asc' _, metadata = pm.utils.parsing.parse_eyelink(example_asc_monocular_path) @@ -424,21 +435,22 @@ def test_parse_val_cal_eyelink_monocular_file(): @pytest.mark.parametrize( ('metadata', 'expected_blinks'), [ - pytest.param( - '** DATE: Wed Mar 8 09:25:20 2023\n' - 'EVENTS GAZE LEFT RATE 1000.00 TRACKING CR FILTER 2\n' - 'SBLINK R 10000018\n' - '10000019 . . 0.0 0.0 ...\n' - '10000020 . . 0.0 0.0 ...\n' - 'EBLINK R 10000018 10000020 2\n', - [{ - 'duration_ms': 2, - 'num_samples': 2, - 'start_timestamp': 10000018, - 'stop_timestamp': 10000020, - }], - id='blink', - ), + pytest.param('MSG 2154555 RECCFG CR 1000 2 1 L\n' + '** DATE: Wed Mar 8 09:25:20 2023\n' + 'MSG 2154555 RECCFG CR 1000 2 1 L\n' + 'EVENTS GAZE LEFT RATE 1000.00 TRACKING CR FILTER 2\n' + 'SBLINK R 10000018\n' + '10000019 . . 0.0 0.0 ...\n' + '10000020 . . 0.0 0.0 ...\n' + 'EBLINK R 10000018 10000020 2\n', + [{ + 'duration_ms': 2, + 'num_samples': 2, + 'start_timestamp': 10000018, + 'stop_timestamp': 10000020, + }], + id='blink', + ), pytest.param( '** DATE: Wed Mar 8 09:25:20 2023\n' 'EVENTS GAZE LEFT RATE 1000.00 TRACKING CR FILTER 2\n' @@ -504,8 +516,9 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): '10000020 . . 0.0 0.0 ...\n' 'EBLINK R 10000018 10000020 2\n' 'END 10000020 SAMPLES EVENTS RES 38.54 31.12\n', - 1, - 1, + # asc snipped which gets processed by the function + 1, # expected_blink_ratio + 1, # expected_overall_ratio id='only_blinks', ), pytest.param( @@ -584,7 +597,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): 'END 10000024 SAMPLES EVENTS RES 38.54 31.12\n', 0.25, 0.75, - id='missing_timestamps_lost_samples', + id='missing_timestamps_lost_samples4', ), pytest.param( 'MSG 2154555 RECCFG CR 1000 2 1 L\n' @@ -604,7 +617,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): 'END 10000021 SAMPLES EVENTS RES 38.54 31.12\n', 0, 1, - id='missing_timestamps_lost_samples', + id='missing_timestamps_lost_samples12', ), ], ) @@ -615,6 +628,7 @@ def test_parse_eyelink_data_loss_ratio( filepath.write_text(metadata) _, parsed_metadata = pm.utils.parsing.parse_eyelink(filepath) + print(f"parsed_metadata: {parsed_metadata}, \nmetadata: {metadata}") assert parsed_metadata['data_loss_ratio_blinks'] == expected_blink_ratio assert parsed_metadata['data_loss_ratio'] == expected_overall_ratio From 10cce1839828cb17bd3c12bb5bcfdcbb116638f5 Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Thu, 7 Nov 2024 12:23:27 +0100 Subject: [PATCH 06/21] changed _calculate_data_loss sampling_rate variable, to reflect the changes in the metadata dict, modified parsing_test.py to account for the changes --- src/pymovements/utils/parsing.py | 36 +++++++++++++++++++------------- tests/unit/utils/parsing_test.py | 13 +++++++++--- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index 3532ce0d..14f9e8b4 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -90,11 +90,13 @@ r'MSG\s+(?P\d+[.]?\d*)\s+' r'RECCFG\s+(?P[A-Z,a-z]+)\s+' r'(?P\d+)\s+' - r'(?P(0|1|2))\s+' - r'(?P(0|1|2))\s+' - r'(?P(L|R|S))\s*', + r'(?P0|1|2)\s+' + r'(?P0|1|2)\s+' + r'(?PLR|[LR])\s*', #Todo retun to initial state of the regex ) +RECORDING_CONFIG_new = re.compile(r'MSG\s+(?P\d+[.]?\d?)\s+RECCFG\s+(?P[A-Z,a-z]+)\s+(?P\d+)\s+(?P0|1|2)\s+(?P0|1|2)\s+(?PLR|[RL])\s*') + def check_nan(sample_location: str) -> float: """Return position as float or np.nan depending on validity of sample. @@ -302,6 +304,10 @@ def parse_eyelink( blinks.append(blink_info) print('blinks: ', blinks) + elif eye_side_match := RECORDING_CONFIG.match(line): + print("i matched recording config") + recording_config.append(eye_side_match.groupdict()) + elif match := START_RECORDING_REGEX.match(line): start_recording_timestamp = match.groupdict()['timestamp'] @@ -354,29 +360,34 @@ def parse_eyelink( metadata.update(match.groupdict()) # each metadata pattern should only match once - print("comqiled metadata patterns before removing: ", compiled_metadata_patterns) + #print("comqiled metadata patterns before removing: ", compiled_metadata_patterns) compiled_metadata_patterns.remove(pattern_dict) - print("comqiled metadata patternsafter removing : ", compiled_metadata_patterns) + #print("comqiled metadata patternsafter removing : ", compiled_metadata_patterns) - elif eye_side_match := RECORDING_CONFIG.match(line): - print("i matched recording config") - recording_config.append(eye_side_match.groupdict()) print("recording config:", recording_config, "Regex: ", RECORDING_CONFIG) + print("blinks:", blinks, "Regex: ", BLINK_STOP_REGEX) print("metadata: ", metadata) # if the sampling rate is not found, we cannot calculate the data loss actual_number_of_samples = len(samples['time']) + # if we don't have any recording config, we cannot calculate the data loss + if not recording_config: + actual_sampling_rate = None + + else: + actual_sampling_rate = float(recording_config[0]['sampling_rate']) data_loss_ratio, data_loss_ratio_blinks = _calculate_data_loss( blinks=blinks, invalid_samples=invalid_samples, actual_num_samples=actual_number_of_samples, total_rec_duration=total_recording_duration, - sampling_rate=recording_config[0]['sampling_rate'], + sampling_rate=actual_sampling_rate, ) - + if not metadata: + raise Warning('No metadata found. Please check the file for errors.') pre_processed_metadata: dict[str, Any] = _pre_process_metadata(metadata) # is not yet pre-processed but should be pre_processed_metadata['calibrations'] = calibrations @@ -426,10 +437,7 @@ def _pre_process_metadata(metadata: defaultdict[str, Any]) -> dict[str, Any]: resolution = (coordinates[2] - coordinates[0] + 1, coordinates[3] - coordinates[1] + 1) metadata['resolution'] = resolution - if metadata['sampling_rate']: - metadata['sampling_rate'] = float(metadata['sampling_rate']) - else: - metadata['sampling_rate'] = 'unknown' + # if the date has been parsed fully, convert the date to a datetime object if 'day' in metadata and 'year' in metadata and 'month' in metadata and 'time' in metadata: diff --git a/tests/unit/utils/parsing_test.py b/tests/unit/utils/parsing_test.py index d551f40a..ff8fe7f8 100644 --- a/tests/unit/utils/parsing_test.py +++ b/tests/unit/utils/parsing_test.py @@ -9,7 +9,7 @@ # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +#git # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -65,7 +65,7 @@ START 10000004 RIGHT SAMPLES EVENTS 10000004 850.7 717.5 714.0 0.0 ... END 10000005 SAMPLES EVENTS RES 38.54 31.12 -MSG 10000005 RECCFG CR 2000 1 1 R +MSG 10000013.5 RECCFG CR 2000 1 1 R MSG 10000005 METADATA_1 123 MSG 10000005 START_B the next line now should have the task column set to B @@ -508,7 +508,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): @pytest.mark.parametrize( ('metadata', 'expected_blink_ratio', 'expected_overall_ratio'), [ - pytest.param( + pytest.param( '** DATE: Wed Mar 8 09:25:20 2023\n' 'MSG 2154555 RECCFG CR 1000 2 1 L\n' 'START 10000018 RIGHT SAMPLES EVENTS\n' 'SBLINK R 10000018\n' @@ -534,6 +534,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): id='unknown_sampling_rate_only_blinks', ), pytest.param( + '** DATE: Wed Mar 8 09:25:20 2023\n' 'MSG 2154555 RECCFG CR 1000 2 1 L\n' 'START 10000018 RIGHT SAMPLES EVENTS\n' '10000019 . . 0.0 0.0 ...\n' @@ -543,6 +544,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): id='lost_samples_no_blinks', ), pytest.param( + '** DATE: Wed Mar 8 09:25:20 2023\n' 'MSG 2154555 RECCFG CR 1000 2 1 L\n' 'START 10000018 RIGHT SAMPLES EVENTS\n' 'SBLINK R 10000018\n' @@ -565,6 +567,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): id='lost_samples_no_sampling_rate', ), pytest.param( + '** DATE: Wed Mar 8 09:25:20 2023\n' 'MSG 2154555 RECCFG CR 1000 2 1 L\n' 'START 10000018 RIGHT SAMPLES EVENTS\n' '10000019 850.7 717.5 714.0 0.0 ...\n' @@ -576,6 +579,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): id='missing_timestamps', ), pytest.param( + '** DATE: Wed Mar 8 09:25:20 2023\n' 'MSG 2154555 RECCFG CR 1000 2 1 L\n' 'START 10000018 RIGHT SAMPLES EVENTS\n' '10000019 850.7 717.5 714.0 0.0 ...\n' @@ -587,6 +591,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): id='missing_timestamps_lost_samples', ), pytest.param( + '** DATE: Wed Mar 8 09:25:20 2023\n' 'MSG 2154555 RECCFG CR 1000 2 1 L\n' 'START 10000020 RIGHT SAMPLES EVENTS\n' '10000020 850.7 717.5 714.0 0.0 ...\n' @@ -600,6 +605,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): id='missing_timestamps_lost_samples4', ), pytest.param( + '** DATE: Wed Mar 8 09:25:20 2023\n' 'MSG 2154555 RECCFG CR 1000 2 1 L\n' 'SBLINK R 10000018\n' '10000019 . . 0.0 0.0 ...\n' @@ -612,6 +618,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): id='blinks_and_lost_samples_no_start_end', ), pytest.param( + '** DATE: Wed Mar 8 09:25:20 2023\n' 'MSG 2154555 RECCFG CR 1000 2 1 L\n' 'START 10000020 RIGHT SAMPLES EVENTS\n' 'END 10000021 SAMPLES EVENTS RES 38.54 31.12\n', From 86f67b22ebd58b5582f1fa64bbc0db73da5c1161 Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Thu, 7 Nov 2024 12:29:33 +0100 Subject: [PATCH 07/21] changed _calculate_data_loss sampling_rate variable, to reflect the changes in the metadata dict, modified parsing_test.py to account for the changes --- src/pymovements/utils/parsing.py | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index 14f9e8b4..3710df1d 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -92,12 +92,9 @@ r'(?P\d+)\s+' r'(?P0|1|2)\s+' r'(?P0|1|2)\s+' - r'(?PLR|[LR])\s*', #Todo retun to initial state of the regex + r'(?PLR|[LR])\s*', ) -RECORDING_CONFIG_new = re.compile(r'MSG\s+(?P\d+[.]?\d?)\s+RECCFG\s+(?P[A-Z,a-z]+)\s+(?P\d+)\s+(?P0|1|2)\s+(?P0|1|2)\s+(?PLR|[RL])\s*') - - def check_nan(sample_location: str) -> float: """Return position as float or np.nan depending on validity of sample. @@ -233,11 +230,11 @@ def parse_eyelink( with open(filepath, encoding='ascii') as asc_file: lines = asc_file.readlines() - print(" ascifile lines", lines) + # will return an empty string if the key does not exist metadata: defaultdict = defaultdict(str) - print("metadat initial: ", metadata) + # metadata keys specified by the user should have a default value of None metadata_keys = get_pattern_keys(compiled_metadata_patterns, 'key') for key in metadata_keys: @@ -259,7 +256,6 @@ def parse_eyelink( num_blink_samples = 0 for line in lines: - print("line in asci file: ", line) for pattern_dict in compiled_patterns: @@ -287,12 +283,10 @@ def parse_eyelink( cal_timestamp = '' elif BLINK_START_REGEX.match(line): - print('blink start') blink = True elif match := BLINK_STOP_REGEX.match(line): blink = False - print('blink end') parsed_blink = match.groupdict() blink_info = { 'start_timestamp': float(parsed_blink['timestamp_start']), @@ -302,10 +296,9 @@ def parse_eyelink( } num_blink_samples = 0 blinks.append(blink_info) - print('blinks: ', blinks) + elif eye_side_match := RECORDING_CONFIG.match(line): - print("i matched recording config") recording_config.append(eye_side_match.groupdict()) elif match := START_RECORDING_REGEX.match(line): @@ -360,14 +353,7 @@ def parse_eyelink( metadata.update(match.groupdict()) # each metadata pattern should only match once - #print("comqiled metadata patterns before removing: ", compiled_metadata_patterns) compiled_metadata_patterns.remove(pattern_dict) - #print("comqiled metadata patternsafter removing : ", compiled_metadata_patterns) - - - print("recording config:", recording_config, "Regex: ", RECORDING_CONFIG) - print("blinks:", blinks, "Regex: ", BLINK_STOP_REGEX) - print("metadata: ", metadata) # if the sampling rate is not found, we cannot calculate the data loss @@ -388,6 +374,7 @@ def parse_eyelink( ) if not metadata: raise Warning('No metadata found. Please check the file for errors.') + pre_processed_metadata: dict[str, Any] = _pre_process_metadata(metadata) # is not yet pre-processed but should be pre_processed_metadata['calibrations'] = calibrations @@ -397,7 +384,7 @@ def parse_eyelink( pre_processed_metadata['data_loss_ratio_blinks'] = data_loss_ratio_blinks pre_processed_metadata['total_recording_duration_ms'] = total_recording_duration pre_processed_metadata['recording_config'] = recording_config - print("pre_processed_metadata: ", pre_processed_metadata) + if not metadata: raise Warning('No metadata found. Please check the file for errors.') schema_overrides = { @@ -463,7 +450,7 @@ def _calculate_data_loss( invalid_samples: list[str], actual_num_samples: int, total_rec_duration: float, - sampling_rate: float, + sampling_rate: float | None, ) -> tuple[float | str, float | str]: """Calculate data loss and blink loss. @@ -486,7 +473,6 @@ def _calculate_data_loss( Data loss ratio and blink loss ratio. """ if not sampling_rate or not total_rec_duration: - print(f'samlpingrate {sampling_rate}, total_rec_tim:', total_rec_duration) return 'unknown', 'unknown' dl_ratio_blinks = 0.0 From 8542fc1ee0f1d0f495656f704bfe2e33c0a9b6ed Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Thu, 7 Nov 2024 12:30:52 +0100 Subject: [PATCH 08/21] changed _calculate_data_loss sampling_rate variable, to reflect the changes in the metadata dict, modified parsing_test.py to account for the changes --- tests/unit/utils/parsing_test.py | 121 +++++++++++++++++++------------ 1 file changed, 73 insertions(+), 48 deletions(-) diff --git a/tests/unit/utils/parsing_test.py b/tests/unit/utils/parsing_test.py index ff8fe7f8..45fe8cd9 100644 --- a/tests/unit/utils/parsing_test.py +++ b/tests/unit/utils/parsing_test.py @@ -1,3 +1,23 @@ +# Copyright (c) 2024 The pymovements Project Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + # Copyright (c) 2023-2024 The pymovements Project Authors # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -9,7 +29,7 @@ # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -#git +# git # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -186,21 +206,25 @@ 'metadata_2': 'abc', 'metadata_3': True, 'metadata_4': None, - 'recording_config': [{ - 'sampling_rate': '1000', # MSG 2154555 RECCFG CR 1000 2 1 L - 'file_sample_filter': '2', - 'link_sample_filter': '1', - 'timestamp': '2154555', - 'tracked_eye': 'L', - 'tracking_mode': 'CR', - }, - {'sampling_rate': '2000', # MSG 10000013.5 RECCFG CR 2000 1 1 R - 'file_sample_filter': '1', - 'link_sample_filter': '1', - 'timestamp': '10000013.5', - 'tracked_eye': 'R', - 'tracking_mode': 'CR', - }]} + 'recording_config': [ + { + 'sampling_rate': '1000', # MSG 2154555 RECCFG CR 1000 2 1 L + 'file_sample_filter': '2', + 'link_sample_filter': '1', + 'timestamp': '2154555', + 'tracked_eye': 'L', + 'tracking_mode': 'CR', + }, + { + 'sampling_rate': '2000', # MSG 10000013.5 RECCFG CR 2000 1 1 R + 'file_sample_filter': '1', + 'link_sample_filter': '1', + 'timestamp': '10000013.5', + 'tracked_eye': 'R', + 'tracking_mode': 'CR', + }, + ], +} def test_parse_eyelink(tmp_path): @@ -412,7 +436,7 @@ def test_val_cal_eyelink(tmp_path, metadata, expected_validation, expected_calib def test_parse_val_cal_eyelink_monocular_file(): example_asc_monocular_path = Path(__file__).parent.parent.parent / \ - 'files/eyelink_monocular_example.asc' + 'files/eyelink_monocular_example.asc' _, metadata = pm.utils.parsing.parse_eyelink(example_asc_monocular_path) @@ -435,22 +459,23 @@ def test_parse_val_cal_eyelink_monocular_file(): @pytest.mark.parametrize( ('metadata', 'expected_blinks'), [ - pytest.param('MSG 2154555 RECCFG CR 1000 2 1 L\n' - '** DATE: Wed Mar 8 09:25:20 2023\n' - 'MSG 2154555 RECCFG CR 1000 2 1 L\n' - 'EVENTS GAZE LEFT RATE 1000.00 TRACKING CR FILTER 2\n' - 'SBLINK R 10000018\n' - '10000019 . . 0.0 0.0 ...\n' - '10000020 . . 0.0 0.0 ...\n' - 'EBLINK R 10000018 10000020 2\n', - [{ - 'duration_ms': 2, - 'num_samples': 2, - 'start_timestamp': 10000018, - 'stop_timestamp': 10000020, - }], - id='blink', - ), + pytest.param( + 'MSG 2154555 RECCFG CR 1000 2 1 L\n' + '** DATE: Wed Mar 8 09:25:20 2023\n' + 'MSG 2154555 RECCFG CR 1000 2 1 L\n' + 'EVENTS GAZE LEFT RATE 1000.00 TRACKING CR FILTER 2\n' + 'SBLINK R 10000018\n' + '10000019 . . 0.0 0.0 ...\n' + '10000020 . . 0.0 0.0 ...\n' + 'EBLINK R 10000018 10000020 2\n', + [{ + 'duration_ms': 2, + 'num_samples': 2, + 'start_timestamp': 10000018, + 'stop_timestamp': 10000020, + }], + id='blink', + ), pytest.param( '** DATE: Wed Mar 8 09:25:20 2023\n' 'EVENTS GAZE LEFT RATE 1000.00 TRACKING CR FILTER 2\n' @@ -508,19 +533,19 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): @pytest.mark.parametrize( ('metadata', 'expected_blink_ratio', 'expected_overall_ratio'), [ - pytest.param( '** DATE: Wed Mar 8 09:25:20 2023\n' - 'MSG 2154555 RECCFG CR 1000 2 1 L\n' - 'START 10000018 RIGHT SAMPLES EVENTS\n' - 'SBLINK R 10000018\n' - '10000019 . . 0.0 0.0 ...\n' - '10000020 . . 0.0 0.0 ...\n' - 'EBLINK R 10000018 10000020 2\n' - 'END 10000020 SAMPLES EVENTS RES 38.54 31.12\n', - # asc snipped which gets processed by the function - 1, # expected_blink_ratio - 1, # expected_overall_ratio - id='only_blinks', - ), + pytest.param('** DATE: Wed Mar 8 09:25:20 2023\n' + 'MSG 2154555 RECCFG CR 1000 2 1 L\n' + 'START 10000018 RIGHT SAMPLES EVENTS\n' + 'SBLINK R 10000018\n' + '10000019 . . 0.0 0.0 ...\n' + '10000020 . . 0.0 0.0 ...\n' + 'EBLINK R 10000018 10000020 2\n' + 'END 10000020 SAMPLES EVENTS RES 38.54 31.12\n', + # asc snipped which gets processed by the function + 1, # expected_blink_ratio + 1, # expected_overall_ratio + id='only_blinks', + ), pytest.param( '** DATE: Wed Mar 8 09:25:20 2023\n' 'START 10000018 RIGHT SAMPLES EVENTS\n' @@ -579,7 +604,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): id='missing_timestamps', ), pytest.param( - '** DATE: Wed Mar 8 09:25:20 2023\n' + '** DATE: Wed Mar 8 09:25:20 2023\n' 'MSG 2154555 RECCFG CR 1000 2 1 L\n' 'START 10000018 RIGHT SAMPLES EVENTS\n' '10000019 850.7 717.5 714.0 0.0 ...\n' @@ -635,7 +660,7 @@ def test_parse_eyelink_data_loss_ratio( filepath.write_text(metadata) _, parsed_metadata = pm.utils.parsing.parse_eyelink(filepath) - print(f"parsed_metadata: {parsed_metadata}, \nmetadata: {metadata}") + print(f'parsed_metadata: {parsed_metadata}, \nmetadata: {metadata}') assert parsed_metadata['data_loss_ratio_blinks'] == expected_blink_ratio assert parsed_metadata['data_loss_ratio'] == expected_overall_ratio From 95f6b3e9de1e726c6f4cb417a5dbdd59c022f1f5 Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Thu, 7 Nov 2024 12:45:36 +0100 Subject: [PATCH 09/21] changed _calculate_data_loss sampling_rate variable, to reflect the changes in the metadata dict, modified parsing_test.py to account for the changes --- src/pymovements/utils/parsing.py | 13 ++++++++----- tests/unit/utils/parsing_test.py | 28 ++++++++++++++-------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index 3710df1d..26234755 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -359,18 +359,15 @@ def parse_eyelink( # if the sampling rate is not found, we cannot calculate the data loss actual_number_of_samples = len(samples['time']) # if we don't have any recording config, we cannot calculate the data loss - if not recording_config: - actual_sampling_rate = None + sampling_rate = _check_sampling_rate(recording_config) - else: - actual_sampling_rate = float(recording_config[0]['sampling_rate']) data_loss_ratio, data_loss_ratio_blinks = _calculate_data_loss( blinks=blinks, invalid_samples=invalid_samples, actual_num_samples=actual_number_of_samples, total_rec_duration=total_recording_duration, - sampling_rate=actual_sampling_rate, + sampling_rate=sampling_rate, ) if not metadata: raise Warning('No metadata found. Please check the file for errors.') @@ -444,6 +441,12 @@ def _pre_process_metadata(metadata: defaultdict[str, Any]) -> dict[str, Any]: return return_metadata +def _check_sampling_rate(recording_config: list[dict[str, Any]]) -> float | None: + if not recording_config: + sampling_rate = None + else: + sampling_rate = float(recording_config[0]['sampling_rate']) + return sampling_rate def _calculate_data_loss( blinks: list[dict[str, Any]], diff --git a/tests/unit/utils/parsing_test.py b/tests/unit/utils/parsing_test.py index 45fe8cd9..c4e5c9d8 100644 --- a/tests/unit/utils/parsing_test.py +++ b/tests/unit/utils/parsing_test.py @@ -17,7 +17,6 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - # Copyright (c) 2023-2024 The pymovements Project Authors # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -533,19 +532,20 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): @pytest.mark.parametrize( ('metadata', 'expected_blink_ratio', 'expected_overall_ratio'), [ - pytest.param('** DATE: Wed Mar 8 09:25:20 2023\n' - 'MSG 2154555 RECCFG CR 1000 2 1 L\n' - 'START 10000018 RIGHT SAMPLES EVENTS\n' - 'SBLINK R 10000018\n' - '10000019 . . 0.0 0.0 ...\n' - '10000020 . . 0.0 0.0 ...\n' - 'EBLINK R 10000018 10000020 2\n' - 'END 10000020 SAMPLES EVENTS RES 38.54 31.12\n', - # asc snipped which gets processed by the function - 1, # expected_blink_ratio - 1, # expected_overall_ratio - id='only_blinks', - ), + pytest.param( + '** DATE: Wed Mar 8 09:25:20 2023\n' + 'MSG 2154555 RECCFG CR 1000 2 1 L\n' + 'START 10000018 RIGHT SAMPLES EVENTS\n' + 'SBLINK R 10000018\n' + '10000019 . . 0.0 0.0 ...\n' + '10000020 . . 0.0 0.0 ...\n' + 'EBLINK R 10000018 10000020 2\n' + 'END 10000020 SAMPLES EVENTS RES 38.54 31.12\n', + # asc snipped which gets processed by the function + 1, # expected_blink_ratio + 1, # expected_overall_ratio + id='only_blinks', + ), pytest.param( '** DATE: Wed Mar 8 09:25:20 2023\n' 'START 10000018 RIGHT SAMPLES EVENTS\n' From ad168c8d65f9acd03ba633d5ade079aeb048d702 Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Thu, 7 Nov 2024 12:49:33 +0100 Subject: [PATCH 10/21] changed _calculate_data_loss sampling_rate variable, to reflect the changes in the metadata dict, modified parsing_test.py to account for the changes --- src/pymovements/utils/parsing.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index 26234755..5417f3ab 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -382,8 +382,6 @@ def parse_eyelink( pre_processed_metadata['total_recording_duration_ms'] = total_recording_duration pre_processed_metadata['recording_config'] = recording_config - if not metadata: - raise Warning('No metadata found. Please check the file for errors.') schema_overrides = { 'time': pl.Float64, 'x_pix': pl.Float64, @@ -442,6 +440,17 @@ def _pre_process_metadata(metadata: defaultdict[str, Any]) -> dict[str, Any]: return return_metadata def _check_sampling_rate(recording_config: list[dict[str, Any]]) -> float | None: + """Check if the sampling rate is available in the recording config. + Parameters + ---------- + recording_config : list[dict[str, Any]] + List of dictionaries containing recording configuration details. + + Returns + ------- + float | None + The sampling rate as a float if available, otherwise None. + """ if not recording_config: sampling_rate = None else: From ec9f99d90720ac8d6ad8d75817fe13fdaf18e21d Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Thu, 7 Nov 2024 12:53:34 +0100 Subject: [PATCH 11/21] changed _calculate_data_loss sampling_rate variable, to reflect the changes in the metadata dict, modified parsing_test.py to account for the changes --- src/pymovements/utils/parsing.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index 5417f3ab..bfc194a0 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -95,6 +95,7 @@ r'(?PLR|[LR])\s*', ) + def check_nan(sample_location: str) -> float: """Return position as float or np.nan depending on validity of sample. @@ -231,7 +232,6 @@ def parse_eyelink( with open(filepath, encoding='ascii') as asc_file: lines = asc_file.readlines() - # will return an empty string if the key does not exist metadata: defaultdict = defaultdict(str) @@ -258,7 +258,6 @@ def parse_eyelink( for line in lines: for pattern_dict in compiled_patterns: - if match := pattern_dict['pattern'].match(line): if 'value' in pattern_dict: current_column = pattern_dict['column'] @@ -297,7 +296,6 @@ def parse_eyelink( num_blink_samples = 0 blinks.append(blink_info) - elif eye_side_match := RECORDING_CONFIG.match(line): recording_config.append(eye_side_match.groupdict()) @@ -310,7 +308,6 @@ def parse_eyelink( total_recording_duration += block_duration - elif eye_tracking_sample_match := EYE_TRACKING_SAMPLE.match(line): timestamp_s = eye_tracking_sample_match.group('time') @@ -355,13 +352,11 @@ def parse_eyelink( # each metadata pattern should only match once compiled_metadata_patterns.remove(pattern_dict) - # if the sampling rate is not found, we cannot calculate the data loss actual_number_of_samples = len(samples['time']) # if we don't have any recording config, we cannot calculate the data loss sampling_rate = _check_sampling_rate(recording_config) - data_loss_ratio, data_loss_ratio_blinks = _calculate_data_loss( blinks=blinks, invalid_samples=invalid_samples, @@ -419,8 +414,6 @@ def _pre_process_metadata(metadata: defaultdict[str, Any]) -> dict[str, Any]: resolution = (coordinates[2] - coordinates[0] + 1, coordinates[3] - coordinates[1] + 1) metadata['resolution'] = resolution - - # if the date has been parsed fully, convert the date to a datetime object if 'day' in metadata and 'year' in metadata and 'month' in metadata and 'time' in metadata: metadata['day'] = int(metadata['day']) @@ -439,8 +432,10 @@ def _pre_process_metadata(metadata: defaultdict[str, Any]) -> dict[str, Any]: return return_metadata + def _check_sampling_rate(recording_config: list[dict[str, Any]]) -> float | None: """Check if the sampling rate is available in the recording config. + Parameters ---------- recording_config : list[dict[str, Any]] @@ -457,6 +452,7 @@ def _check_sampling_rate(recording_config: list[dict[str, Any]]) -> float | None sampling_rate = float(recording_config[0]['sampling_rate']) return sampling_rate + def _calculate_data_loss( blinks: list[dict[str, Any]], invalid_samples: list[str], From 18414fbac2b990a604f9c666f9dc3c339bd727d6 Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Thu, 7 Nov 2024 12:56:43 +0100 Subject: [PATCH 12/21] changed _calculate_data_loss sampling_rate variable, to reflect the changes in the metadata dict, modified parsing_test.py to account for the changes, modified parsing.py to adhere to pylint --- src/pymovements/utils/parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index bfc194a0..9b039747 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -458,7 +458,7 @@ def _calculate_data_loss( invalid_samples: list[str], actual_num_samples: int, total_rec_duration: float, - sampling_rate: float | None, + sampling_rate: float | None = None, ) -> tuple[float | str, float | str]: """Calculate data loss and blink loss. From 8d1a7e68b980f1d09103e0ed9d01e3e305115bde Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Thu, 7 Nov 2024 12:59:19 +0100 Subject: [PATCH 13/21] changed _calculate_data_loss sampling_rate variable, to reflect the changes in the metadata dict, modified parsing_test.py to account for the changes, modified parsing.py to adhere to pylint --- src/pymovements/utils/parsing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index 9b039747..185913c5 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -457,7 +457,7 @@ def _calculate_data_loss( blinks: list[dict[str, Any]], invalid_samples: list[str], actual_num_samples: int, - total_rec_duration: float, + total_rec_duration: float | None = None, sampling_rate: float | None = None, ) -> tuple[float | str, float | str]: """Calculate data loss and blink loss. @@ -470,9 +470,9 @@ def _calculate_data_loss( List of invalid samples. actual_num_samples: int Number of actual samples recorded. - total_rec_duration: float + total_rec_duration: float | None Total duration of the recording. - sampling_rate: float + sampling_rate: float | None Sampling rate of the eye tracker. Returns From 8dafbee0b2ffb28b857768d34028ef338e92f2cf Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Thu, 7 Nov 2024 13:43:02 +0100 Subject: [PATCH 14/21] adapted io.py to pass tests --- src/pymovements/gaze/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pymovements/gaze/io.py b/src/pymovements/gaze/io.py index 18868e3d..42bd551d 100644 --- a/src/pymovements/gaze/io.py +++ b/src/pymovements/gaze/io.py @@ -332,8 +332,8 @@ def from_asc( │ 2339290 ┆ 618.0 ┆ [637.6, 531.4] │ │ 2339291 ┆ 618.0 ┆ [637.3, 531.2] │ └─────────┴───────┴────────────────┘ - >>> metadata['sampling_rate'] - 1000.0 + >>> metadata["recording_config"][0]['sampling_rate'] + '1000.0' """ if isinstance(patterns, str): if patterns == 'eyelink': From 0267ad14eafab27026a8537ff69df013d14dfb20 Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Thu, 7 Nov 2024 14:01:02 +0100 Subject: [PATCH 15/21] adapted io.py to pass tests --- src/pymovements/gaze/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pymovements/gaze/io.py b/src/pymovements/gaze/io.py index 42bd551d..ad2a9e3f 100644 --- a/src/pymovements/gaze/io.py +++ b/src/pymovements/gaze/io.py @@ -332,8 +332,8 @@ def from_asc( │ 2339290 ┆ 618.0 ┆ [637.6, 531.4] │ │ 2339291 ┆ 618.0 ┆ [637.3, 531.2] │ └─────────┴───────┴────────────────┘ - >>> metadata["recording_config"][0]['sampling_rate'] - '1000.0' + >>> float(metadata["recording_config"][0]['sampling_rate']) + 1000.0 """ if isinstance(patterns, str): if patterns == 'eyelink': From 253d9f2439ad790a0cc75ae1355c8c53104bab62 Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Wed, 20 Nov 2024 13:26:18 +0100 Subject: [PATCH 16/21] modified parsing_test.py according to comments --- tests/unit/utils/parsing_test.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/tests/unit/utils/parsing_test.py b/tests/unit/utils/parsing_test.py index c4e5c9d8..9a34fee2 100644 --- a/tests/unit/utils/parsing_test.py +++ b/tests/unit/utils/parsing_test.py @@ -1,22 +1,3 @@ -# Copyright (c) 2024 The pymovements Project Authors -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. # Copyright (c) 2023-2024 The pymovements Project Authors # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -660,7 +641,6 @@ def test_parse_eyelink_data_loss_ratio( filepath.write_text(metadata) _, parsed_metadata = pm.utils.parsing.parse_eyelink(filepath) - print(f'parsed_metadata: {parsed_metadata}, \nmetadata: {metadata}') assert parsed_metadata['data_loss_ratio_blinks'] == expected_blink_ratio assert parsed_metadata['data_loss_ratio'] == expected_overall_ratio From 67d07decd4bc80b4f3026d6e4b15532531090817 Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Wed, 20 Nov 2024 13:41:39 +0100 Subject: [PATCH 17/21] added consistency check for sampling rate --- src/pymovements/utils/parsing.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index 185913c5..c3be2b8d 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -444,12 +444,17 @@ def _check_sampling_rate(recording_config: list[dict[str, Any]]) -> float | None Returns ------- float | None - The sampling rate as a float if available, otherwise None. + The sampling rate of the first entry as a float if available, otherwise None. + Raises a warning if no recording configuration is found or if the sampling rate is inconsistent. """ if not recording_config: sampling_rate = None + raise Warning('No recording configuration found. Cannot calculate data loss.') else: - sampling_rate = float(recording_config[0]['sampling_rate']) + sampling_rates = {d.get('sampling_rate') for d in recording_config} + if len(sampling_rates) != 1: + raise Warning('Inconsistent sampling rates found. The first recorded sampling rate is used to calculate the dataloss.') + sampling_rate = float(sampling_rates.pop()) return sampling_rate From 034c5abaac649fc779974e3007883886110bdbda Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Wed, 20 Nov 2024 14:02:54 +0100 Subject: [PATCH 18/21] changed inconsistency_check to include a print statement. Probably not the cleanest way to do it --- src/pymovements/utils/parsing.py | 8 +++++--- tests/unit/utils/parsing_test.py | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index c3be2b8d..cec6e913 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -445,15 +445,17 @@ def _check_sampling_rate(recording_config: list[dict[str, Any]]) -> float | None ------- float | None The sampling rate of the first entry as a float if available, otherwise None. - Raises a warning if no recording configuration is found or if the sampling rate is inconsistent. + Raises a warning if no recording configuration is found or + if the sampling rate is inconsistent. """ if not recording_config: sampling_rate = None - raise Warning('No recording configuration found. Cannot calculate data loss.') + print('Warning: No recording configuration found. Cannot calculate data loss.') else: sampling_rates = {d.get('sampling_rate') for d in recording_config} if len(sampling_rates) != 1: - raise Warning('Inconsistent sampling rates found. The first recorded sampling rate is used to calculate the dataloss.') + print('Warning: Inconsistent sampling rates found. The first recorded sampling ' + 'rate is used to calculate the dataloss.') sampling_rate = float(sampling_rates.pop()) return sampling_rate diff --git a/tests/unit/utils/parsing_test.py b/tests/unit/utils/parsing_test.py index 9a34fee2..08ecf1b5 100644 --- a/tests/unit/utils/parsing_test.py +++ b/tests/unit/utils/parsing_test.py @@ -1,3 +1,23 @@ +# Copyright (c) 2024 The pymovements Project Authors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + # Copyright (c) 2023-2024 The pymovements Project Authors # # Permission is hereby granted, free of charge, to any person obtaining a copy From 25a86932f685f6a0582288084b457a4609d483fc Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Wed, 20 Nov 2024 14:07:35 +0100 Subject: [PATCH 19/21] changed inconsistency_check to include a print statement. Probably not the cleanest way to do it --- src/pymovements/utils/parsing.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index cec6e913..8691d57c 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -454,9 +454,12 @@ def _check_sampling_rate(recording_config: list[dict[str, Any]]) -> float | None else: sampling_rates = {d.get('sampling_rate') for d in recording_config} if len(sampling_rates) != 1: - print('Warning: Inconsistent sampling rates found. The first recorded sampling ' - 'rate is used to calculate the dataloss.') - sampling_rate = float(sampling_rates.pop()) + print( + 'Warning: Inconsistent sampling rates found. The first recorded sampling ' + 'rate is used to calculate the dataloss.', + ) + sampling_rate = sampling_rates.pop() + sampling_rate = float(sampling_rate) return sampling_rate From 9acb9263341a8ce95a2d0e7804dea06a96414c22 Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Wed, 20 Nov 2024 14:07:39 +0100 Subject: [PATCH 20/21] changed inconsistency_check to include a print statement. Probably not the cleanest way to do it --- tests/unit/utils/parsing_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/utils/parsing_test.py b/tests/unit/utils/parsing_test.py index 08ecf1b5..8e28aeab 100644 --- a/tests/unit/utils/parsing_test.py +++ b/tests/unit/utils/parsing_test.py @@ -17,7 +17,6 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - # Copyright (c) 2023-2024 The pymovements Project Authors # # Permission is hereby granted, free of charge, to any person obtaining a copy From 5e4c6591d6f7484582cd597e2f252d3ae5d1cd23 Mon Sep 17 00:00:00 2001 From: saphjra <49561526+saphjra@users.noreply.github.com> Date: Wed, 20 Nov 2024 14:20:33 +0100 Subject: [PATCH 21/21] changed inconsistency_check to include a print statement. Probably not the cleanest way to do it --- src/pymovements/utils/parsing.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index 8691d57c..cc65e3ab 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -445,7 +445,7 @@ def _check_sampling_rate(recording_config: list[dict[str, Any]]) -> float | None ------- float | None The sampling rate of the first entry as a float if available, otherwise None. - Raises a warning if no recording configuration is found or + prints a warning if no recording configuration is found or if the sampling rate is inconsistent. """ if not recording_config: @@ -458,8 +458,7 @@ def _check_sampling_rate(recording_config: list[dict[str, Any]]) -> float | None 'Warning: Inconsistent sampling rates found. The first recorded sampling ' 'rate is used to calculate the dataloss.', ) - sampling_rate = sampling_rates.pop() - sampling_rate = float(sampling_rate) + sampling_rate = float(recording_config[0]['sampling_rate']) return sampling_rate