From 7465ba513149142db64c8f8c2260218ac8ff65e4 Mon Sep 17 00:00:00 2001 From: julia-pfarr Date: Thu, 20 Jun 2024 16:58:06 +0200 Subject: [PATCH 01/13] Tried to fix bugs, but failed edf2asc suddenly gives out a random order for the calibration positions. E.g., calibration point 3 is listed after calibration point 4. That's why I had to change the function _extract_Calibration poisition (as well as _get_calibration_positions). However, now the tests are failing because something is wrong with the lists in lists thingi and also it tries to get the calibration positions also for the testfiles which have none. I thought I specified that within the functions but obviously it doesn't work. --- eye2bids/edf2bids.py | 45 ++++++++++++++++++++---------------------- tests/test_edf2bids.py | 8 ++------ 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/eye2bids/edf2bids.py b/eye2bids/edf2bids.py index 40a243b..f1964a4 100644 --- a/eye2bids/edf2bids.py +++ b/eye2bids/edf2bids.py @@ -139,37 +139,34 @@ def _extract_CalibrationCount(df: pd.DataFrame) -> int: return len(_calibrations(df)) -def _get_calibration_positions(df: pd.DataFrame) -> list[int]: - if _2eyesmode(df) == True: - return ( - np.array(df[df[2] == "VALIDATE"][8].str.split(",", expand=True)) - .astype(int) - .tolist() - )[::2] - return ( - np.array(df[df[2] == "VALIDATE"][8].str.split(",", expand=True)) - .astype(int) - .tolist() - ) - - def _extract_CalibrationPosition(df: pd.DataFrame) -> list[list[int]]: - cal_pos = _get_calibration_positions(df) - cal_num = len(cal_pos) // _extract_CalibrationCount(df) + + if _has_validation == False: + CalibrationPosition = [] + return CalibrationPosition + + else: + cal_df = df[df[2] == "VALIDATE"]#.drop(columns=[2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17]).reset_index(drop=True) + cal_df[5] = pd.to_numeric(cal_df[5], errors='coerce') + df_sorted = cal_df.sort_values(by=5) - CalibrationPosition: list[list[int]] = [] + if _2eyesmode(df) == True: + df_sorted = df_sorted.drop(index=df_sorted.index[::(_extract_CalibrationCount(df) * 2)]) - if len(cal_pos) == 0: - return CalibrationPosition + if _extract_CalibrationCount(df) == 1: + CalibrationPosition = np.array((df_sorted[8]).str.split(",", expand=True)).astype(int).tolist() + return CalibrationPosition + else: + CalibrationPosition = [] - CalibrationPosition.extend( - cal_pos[i : i + cal_num] for i in range(0, len(cal_pos), cal_num) - ) - return CalibrationPosition + for x in df_sorted: + cal_values = np.array((df_sorted[8][::_extract_CalibrationCount(df)]).str.split(",", expand=True)).astype(int).tolist() + CalibrationPosition.append(cal_values) + return CalibrationPosition def _extract_CalibrationUnit(df: pd.DataFrame) -> str: - if len(_get_calibration_positions(df)) == 0: + if len(_extract_CalibrationPosition(df)) == 0: return "" cal_unit = ( diff --git a/tests/test_edf2bids.py b/tests/test_edf2bids.py index aa231b0..b697503 100644 --- a/tests/test_edf2bids.py +++ b/tests/test_edf2bids.py @@ -283,7 +283,6 @@ def test_extract_CalibrationUnit(folder, expected, eyelink_test_data_dir): ("pitracker", []), ( "rest", - [ [ [960, 540], [960, 732], @@ -298,14 +297,12 @@ def test_extract_CalibrationUnit(folder, expected, eyelink_test_data_dir): [1126, 636], [794, 444], [960, 348], - ] - ], + ], ), ("satf", []), ("vergence", []), ( "2eyes", - [ [ [960, 540], [960, 732], @@ -320,8 +317,7 @@ def test_extract_CalibrationUnit(folder, expected, eyelink_test_data_dir): [1126, 636], [794, 444], [960, 348], - ] - ], + ], ), ], ) From 554966eae5de5bd695af6464ae0ec4f67630a590 Mon Sep 17 00:00:00 2001 From: julia-pfarr Date: Mon, 1 Jul 2024 12:53:04 +0200 Subject: [PATCH 02/13] fix bug: make sure that EyeTrackingMethod is empty if no calibration because otherwise it writes wrong content into the variable --- eye2bids/edf2bids.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/eye2bids/edf2bids.py b/eye2bids/edf2bids.py index 5a2ccb0..f5757ed 100644 --- a/eye2bids/edf2bids.py +++ b/eye2bids/edf2bids.py @@ -127,6 +127,9 @@ def _2eyesmode(df: pd.DataFrame) -> bool: def _calibrations(df: pd.DataFrame) -> pd.DataFrame: return df[df[3] == "CALIBRATION"] +def _has_calibration(df: pd.DataFrame) -> bool: + return not _calibrations(df).empty + def _extract_CalibrationType(df: pd.DataFrame) -> list[int]: return _calibrations(df).iloc[0:1, 2:3].to_string(header=False, index=False) @@ -422,16 +425,19 @@ def generate_physio_json( base_json.input_file = input_file base_json.has_validation = _has_validation(df_ms_reduced) base_json.two_eyes = _2eyesmode(df_ms_reduced) + base_json.has_calibration = _has_calibration(df_ms_reduced) base_json["ManufacturersModelName"] = _extract_ManufacturersModelName(events) base_json["DeviceSerialNumber"] = _extract_DeviceSerialNumber(events) - base_json["EyeTrackingMethod"] = _extract_EyeTrackingMethod(events) base_json["PupilFitMethod"] = _extract_PupilFitMethod(df_ms_reduced) base_json["SamplingFrequency"] = _extract_SamplingFrequency(df_ms_reduced) base_json["StartTime"] = _extract_StartTime(events) base_json["StopTime"] = _extract_StopTime(events) + if base_json.has_calibration: + base_json["EyeTrackingMethod"] = _extract_EyeTrackingMethod(events) + if base_json.two_eyes: metadata_eye1: dict[str, str | list[str] | list[float]] = { "RecordedEye": (_extract_RecordedEye(df_ms_reduced)[0]), From 1c818fa2d28da05896e9f1d38a8ec5146c0506dc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 10:57:15 +0000 Subject: [PATCH 03/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- eye2bids/edf2bids.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eye2bids/edf2bids.py b/eye2bids/edf2bids.py index f5757ed..c34bfec 100644 --- a/eye2bids/edf2bids.py +++ b/eye2bids/edf2bids.py @@ -127,6 +127,7 @@ def _2eyesmode(df: pd.DataFrame) -> bool: def _calibrations(df: pd.DataFrame) -> pd.DataFrame: return df[df[3] == "CALIBRATION"] + def _has_calibration(df: pd.DataFrame) -> bool: return not _calibrations(df).empty From deb2639f5e3ccb2e37d25db475ffed8b08a5a490 Mon Sep 17 00:00:00 2001 From: julia-pfarr Date: Mon, 1 Jul 2024 13:03:50 +0200 Subject: [PATCH 04/13] fix test: parantheses --- tests/test_edf2bids.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_edf2bids.py b/tests/test_edf2bids.py index e854d24..5ab308b 100644 --- a/tests/test_edf2bids.py +++ b/tests/test_edf2bids.py @@ -258,6 +258,7 @@ def test_extract_CalibrationUnit(folder, expected, eyelink_test_data_dir): ), ( "rest", + [ [ [960, 540], [960, 732], @@ -273,9 +274,11 @@ def test_extract_CalibrationUnit(folder, expected, eyelink_test_data_dir): [794, 444], [960, 348], ], + ] ), ( "2eyes", + [ [ [960, 540], [960, 732], From d28f29f44d87ca3ef7334401855d28c5bcc86667 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 11:04:57 +0000 Subject: [PATCH 05/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_edf2bids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_edf2bids.py b/tests/test_edf2bids.py index 5ab308b..0c1abae 100644 --- a/tests/test_edf2bids.py +++ b/tests/test_edf2bids.py @@ -274,7 +274,7 @@ def test_extract_CalibrationUnit(folder, expected, eyelink_test_data_dir): [794, 444], [960, 348], ], - ] + ], ), ( "2eyes", From e993afb1ea49ef0e4b7336b992ecaca691a11991 Mon Sep 17 00:00:00 2001 From: julia-pfarr Date: Tue, 2 Jul 2024 11:58:51 +0200 Subject: [PATCH 06/13] add PhysioType to metadata --- eye2bids/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eye2bids/_base.py b/eye2bids/_base.py index f32cd3b..9cea14d 100644 --- a/eye2bids/_base.py +++ b/eye2bids/_base.py @@ -82,6 +82,7 @@ class BasePhysioJson(dict[str, Any]): def __init__(self, manufacturer: str, metadata: dict[str, Any] | None = None) -> None: self["Manufacturer"] = manufacturer + self["PhysioType"] = "eyetrack" self["Columns"] = ["x_coordinate", "y_coordinate", "pupil_size", "timestamp"] self["timestamp"] = { From 278fe0037656e1e79a453bfbc5ac091ff255a580 Mon Sep 17 00:00:00 2001 From: Julia-Katharina Pfarr <111446107+julia-pfarr@users.noreply.github.com> Date: Fri, 5 Jul 2024 13:09:22 +0200 Subject: [PATCH 07/13] Update _base.py --- eye2bids/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eye2bids/_base.py b/eye2bids/_base.py index 9cea14d..4739855 100644 --- a/eye2bids/_base.py +++ b/eye2bids/_base.py @@ -78,6 +78,7 @@ class BasePhysioJson(dict[str, Any]): input_file: Path has_validation: bool two_eyes: bool + has_calibration: bool def __init__(self, manufacturer: str, metadata: dict[str, Any] | None = None) -> None: From 09356af839e4823e57d79865eea7071438d8a40d Mon Sep 17 00:00:00 2001 From: julia-pfarr Date: Wed, 4 Sep 2024 11:55:28 +0200 Subject: [PATCH 08/13] pre-commit fixes --- eye2bids/edf2bids.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/eye2bids/edf2bids.py b/eye2bids/edf2bids.py index 41797d5..334c007 100644 --- a/eye2bids/edf2bids.py +++ b/eye2bids/edf2bids.py @@ -277,11 +277,9 @@ def _extract_ScreenResolution(df: pd.DataFrame) -> list[int]: def _extract_StartTime(events: list[str]) -> int: - StartTime = ( - np.array(pd.DataFrame([st.split() for st in events if st.startswith("START")])[1]) - .astype(int) - .tolist() - ) + StartTime = pd.DataFrame([st.split() for st in events if st.startswith("START")])[ + 1 + ].astype(int) if len(StartTime) > 1: e2b_log.info( """Your input file contains multiple start times.\n @@ -295,11 +293,9 @@ def _extract_StartTime(events: list[str]) -> int: def _extract_StopTime(events: list[str]) -> int: - StopTime = ( - np.array(pd.DataFrame([so.split() for so in events if so.startswith("END")])[1]) - .astype(int) - .tolist() - ) + StopTime = pd.DataFrame([so.split() for so in events if so.startswith("END")])[ + 1 + ].astype(int) if len(StopTime) > 1: e2b_log.info( """Your input file contains multiple stop times.\n From 95a7875bcc2651984e855ab2886660592dd0a22d Mon Sep 17 00:00:00 2001 From: julia-pfarr Date: Wed, 4 Sep 2024 13:17:41 +0200 Subject: [PATCH 09/13] #96: fix start and stop time to be numbers instead of arrays --- eye2bids/edf2bids.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/eye2bids/edf2bids.py b/eye2bids/edf2bids.py index 334c007..2a5aa2d 100644 --- a/eye2bids/edf2bids.py +++ b/eye2bids/edf2bids.py @@ -277,9 +277,11 @@ def _extract_ScreenResolution(df: pd.DataFrame) -> list[int]: def _extract_StartTime(events: list[str]) -> int: - StartTime = pd.DataFrame([st.split() for st in events if st.startswith("START")])[ - 1 - ].astype(int) + StartTime = ( + np.array(pd.DataFrame([st.split() for st in events if st.startswith("START")])[1]) + .astype(int) + .tolist() + ) if len(StartTime) > 1: e2b_log.info( """Your input file contains multiple start times.\n @@ -288,14 +290,15 @@ def _extract_StartTime(events: list[str]) -> int: Please consider changing your code accordingly for future eyetracking experiments.\n""" ) - return StartTime[0] - return StartTime + return StartTime[0] def _extract_StopTime(events: list[str]) -> int: - StopTime = pd.DataFrame([so.split() for so in events if so.startswith("END")])[ - 1 - ].astype(int) + StopTime = ( + np.array(pd.DataFrame([st.split() for st in events if st.startswith("START")])[1]) + .astype(int) + .tolist() + ) if len(StopTime) > 1: e2b_log.info( """Your input file contains multiple stop times.\n @@ -304,8 +307,7 @@ def _extract_StopTime(events: list[str]) -> int: Please consider changing your code accordingly for future eyetracking experiments.\n""" ) - return StopTime[-1] - return StopTime + return StopTime[-1] def _load_asc_file(events_asc_file: str | Path) -> list[str]: From fa25865acb9e27708c8cbba9ef28849704138371 Mon Sep 17 00:00:00 2001 From: julia-pfarr Date: Tue, 19 Nov 2024 17:40:00 -0500 Subject: [PATCH 10/13] issue #102 --- eye2bids/edf2bids.py | 68 +++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/eye2bids/edf2bids.py b/eye2bids/edf2bids.py index 1aeee91..2a13cd5 100644 --- a/eye2bids/edf2bids.py +++ b/eye2bids/edf2bids.py @@ -328,8 +328,10 @@ def _load_asc_file_as_reduced_df(events_asc_file: str | Path) -> pd.DataFrame: return pd.DataFrame(df_ms.iloc[0:, 2:]) -def _df_events_after_start(events: list[str]) -> pd.DataFrame: - """Extract data between START and END messages.""" +def _df_events_from_first_start(events: list[str]) -> pd.DataFrame: + """Extract data starting from the first time START appears + and including last time END appears. + """ start_index = next( i for i, line in enumerate(events) if re.match(r"START\s+.*", line) ) @@ -338,33 +340,44 @@ def _df_events_after_start(events: list[str]) -> pd.DataFrame: ) if end_index > start_index: - data_lines = events[start_index + 1 : end_index] + data_lines = events[start_index : end_index + 1] return pd.DataFrame([line.strip().split("\t") for line in data_lines]) else: return e2b_log.warning("No 'END' found after the selected 'START'.") -def _df_physioevents(events_after_start: pd.DataFrame) -> pd.DataFrame: - events_after_start["Event_Letters"] = ( - events_after_start[0].str.extractall(r"([A-Za-z]+)").groupby(level=0).agg("".join) +def _df_physioevents(events_from_start: pd.DataFrame) -> pd.DataFrame: + events_from_start["Event_Letters"] = ( + events_from_start[0].str.extractall(r"([A-Za-z]+)").groupby(level=0).agg("".join) ) - events_after_start["Event_Numbers"] = events_after_start[0].str.extract(r"(\d+)") - events_after_start[["msg_timestamp", "message"]] = events_after_start[1].str.split( + events_from_start["Event_Numbers"] = events_from_start[0].str.extract(r"(\d+)") + events_from_start[["msg_timestamp", "message"]] = events_from_start[1].str.split( n=1, expand=True ) - events_after_start["message"] = events_after_start["message"].astype(str) + events_from_start["message"] = events_from_start["message"].astype(str) + + events_from_start["message"] = np.where( + events_from_start["Event_Letters"] == "START", + "START", + np.where( + events_from_start["Event_Letters"] == "END", + "END", + events_from_start.get("message", ""), + ), + ) - msg_mask = events_after_start["Event_Letters"] == "MSG" - events_after_start.loc[msg_mask, "Event_Numbers"] = events_after_start.loc[ + msg_mask = events_from_start["Event_Letters"].isin(["MSG", "START", "END"]) + events_from_start.loc[msg_mask, "Event_Numbers"] = events_from_start.loc[ msg_mask, "msg_timestamp" ] + physioevents_reordered = ( pd.concat( [ - events_after_start["Event_Numbers"], - events_after_start[2], - events_after_start["Event_Letters"], - events_after_start["message"], + events_from_start["Event_Numbers"], + events_from_start[2], + events_from_start["Event_Letters"], + events_from_start["message"], ], axis=1, ignore_index=True, @@ -378,14 +391,28 @@ def _df_physioevents(events_after_start: pd.DataFrame) -> pd.DataFrame: def _physioevents_for_eye( physioevents_reordered: pd.DataFrame, eye: str = "L" ) -> pd.DataFrame: - physioevents_eye_list = ["MSG", f"EFIX{eye}", f"ESACC{eye}", f"EBLINK{eye}"] + physioevents_eye_list = [ + "MSG", + f"EFIX{eye}", + f"ESACC{eye}", + f"EBLINK{eye}", + "START", + "END", + ] physioevents = physioevents_reordered[ physioevents_reordered["trial_type"].isin(physioevents_eye_list) ] - physioevents = physioevents.replace( - {f"EFIX{eye}": "fixation", f"ESACC{eye}": "saccade", "MSG": np.nan, None: np.nan} + physioevents["trial_type"] = physioevents["trial_type"].replace( + { + f"EFIX{eye}": "fixation", + f"ESACC{eye}": "saccade", + "MSG": np.nan, + "START": np.nan, + "END": np.nan, + None: np.nan, + } ) physioevents["blink"] = 0 @@ -406,6 +433,7 @@ def _physioevents_for_eye( physioevents = physioevents[physioevents.trial_type != f"EBLINK{eye}"] physioevents["timestamp"] = physioevents["timestamp"].astype("Int64") + physioevents["duration"] = pd.to_numeric(physioevents["duration"], errors="coerce") physioevents["duration"] = physioevents["duration"].astype("Int64") physioevents = physioevents[ @@ -610,8 +638,8 @@ def edf2bids( # %% # Messages and events to dataframes - events_after_start = _df_events_after_start(events) - physioevents_reordered = _df_physioevents(events_after_start) + events_from_start = _df_events_from_first_start(events) + physioevents_reordered = _df_physioevents(events_from_start) physioevents_eye1 = _physioevents_for_eye(physioevents_reordered, eye="L") physioevents_eye2 = _physioevents_for_eye(physioevents_reordered, eye="R") From 6ad8cb9b2d13f37c686e17581c9f7906545d92bf Mon Sep 17 00:00:00 2001 From: julia-pfarr Date: Mon, 20 Jan 2025 13:28:22 -0500 Subject: [PATCH 11/13] modify lines in test_physioevents_value. They changed because we are now capturing START and END messages and thus more lines in output. --- tests/test_edf2bids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_edf2bids.py b/tests/test_edf2bids.py index bc02978..5d3e622 100644 --- a/tests/test_edf2bids.py +++ b/tests/test_edf2bids.py @@ -633,4 +633,4 @@ def test_physioevents_value(folder, expected, eyelink_test_data_dir): output_dir / f"{input_file.stem}_recording-eye1_physioevents.tsv.gz" ) physioevents = pd.read_csv(expected_eyetrackphysio_tsv, sep="\t", header=None) - assert physioevents.iloc[3:10, 2].tolist() == expected + assert physioevents.iloc[4:11, 2].tolist() == expected From c496e342bd1776cd3b6d6e2cac2084194c588773 Mon Sep 17 00:00:00 2001 From: julia-pfarr Date: Tue, 21 Jan 2025 17:39:04 -0500 Subject: [PATCH 12/13] fix #96 and values of calibration errors --- eye2bids/edf2bids.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/eye2bids/edf2bids.py b/eye2bids/edf2bids.py index 28550aa..9fb654b 100644 --- a/eye2bids/edf2bids.py +++ b/eye2bids/edf2bids.py @@ -221,11 +221,11 @@ def _has_validation(df: pd.DataFrame) -> bool: def _extract_MaximalCalibrationError(df: pd.DataFrame) -> list[float]: - return np.array(_validations(df)[[11]]).astype(float).tolist() + return ((_validations(df)[[11]]).astype(float)).to_numpy().tolist() def _extract_AverageCalibrationError(df: pd.DataFrame) -> list[float]: - return np.array(_validations(df)[[9]]).astype(float).tolist() + return ((_validations(df)[[9]]).astype(float)).to_numpy().tolist() def _extract_ManufacturersModelName(events: list[str]) -> str: @@ -276,11 +276,9 @@ def _extract_ScreenResolution(df: pd.DataFrame) -> list[int]: def _extract_StartTime(events: list[str]) -> int: - StartTime = ( - np.array(pd.DataFrame([st.split() for st in events if st.startswith("START")])[1]) - .astype(int) - .tolist() - ) + StartTime = np.array( + pd.DataFrame([st.split() for st in events if st.startswith("START")])[1] + ).astype(int) if len(StartTime) > 1: e2b_log.info( """Your input file contains multiple start times.\n @@ -293,11 +291,9 @@ def _extract_StartTime(events: list[str]) -> int: def _extract_StopTime(events: list[str]) -> int: - StopTime = ( - np.array(pd.DataFrame([st.split() for st in events if st.startswith("START")])[1]) - .astype(int) - .tolist() - ) + StopTime = np.array( + pd.DataFrame([st.split() for st in events if st.startswith("START")])[1] + ).astype(int) if len(StopTime) > 1: e2b_log.info( """Your input file contains multiple stop times.\n From c6fc4cf8ed83654aba9077ed5627b9a983863ee7 Mon Sep 17 00:00:00 2001 From: julia-pfarr Date: Tue, 21 Jan 2025 17:59:39 -0500 Subject: [PATCH 13/13] fix value error calibration values --- eye2bids/edf2bids.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/eye2bids/edf2bids.py b/eye2bids/edf2bids.py index 9fb654b..2fbb29b 100644 --- a/eye2bids/edf2bids.py +++ b/eye2bids/edf2bids.py @@ -276,9 +276,11 @@ def _extract_ScreenResolution(df: pd.DataFrame) -> list[int]: def _extract_StartTime(events: list[str]) -> int: - StartTime = np.array( - pd.DataFrame([st.split() for st in events if st.startswith("START")])[1] - ).astype(int) + StartTime = ( + (pd.DataFrame([st.split() for st in events if st.startswith("START")])[1]) + .astype(int) + .tolist() + ) if len(StartTime) > 1: e2b_log.info( """Your input file contains multiple start times.\n @@ -291,9 +293,11 @@ def _extract_StartTime(events: list[str]) -> int: def _extract_StopTime(events: list[str]) -> int: - StopTime = np.array( - pd.DataFrame([st.split() for st in events if st.startswith("START")])[1] - ).astype(int) + StopTime = ( + (pd.DataFrame([so.split() for so in events if so.startswith("END")])[1]) + .astype(int) + .tolist() + ) if len(StopTime) > 1: e2b_log.info( """Your input file contains multiple stop times.\n