From 53acf8b466c566b1d3d5484ef303e40d3f62648a Mon Sep 17 00:00:00 2001 From: Jakub Romanowski Date: Fri, 25 Oct 2024 15:26:06 +0200 Subject: [PATCH 1/2] Created a tutorial for extracting some quality measures from raw asc data --- .../tutorials/eyetracking_measures.ipynb | 413 ++++++++++++++++++ 1 file changed, 413 insertions(+) create mode 100644 docs/source/tutorials/eyetracking_measures.ipynb diff --git a/docs/source/tutorials/eyetracking_measures.ipynb b/docs/source/tutorials/eyetracking_measures.ipynb new file mode 100644 index 000000000..212704d71 --- /dev/null +++ b/docs/source/tutorials/eyetracking_measures.ipynb @@ -0,0 +1,413 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e20f3277-92cf-43bd-bd9f-8e73d20e98bb", + "metadata": {}, + "source": [ + "# Quality measures tutorial\n", + "\n", + "This is a tutorial for creating quality measures from raw data (.asc). It will include the absolute values and percentage of data for different parameters (missing pupil data, eye-tracking data outside the experiment).\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9da5b864-1f8c-404d-b198-752b9dcbcbd4", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import pymovements as pm\n", + "import polars as pl\n", + "import csv" + ] + }, + { + "cell_type": "markdown", + "id": "90641553-c477-46d6-bb2b-22eeeaef6eb9", + "metadata": { + "tags": [] + }, + "source": [ + "After importing some basic libraries let's load an example eyetracking file." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "133591f7-a070-4cba-8d23-3f8183e22a1a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape: (3_220_492, 7)\n", + "┌────────────┬───────┬──────────┬────────────┬──────────┬──────────┬─────────────────┐\n", + "│ time ┆ pupil ┆ practice ┆ screen ┆ trial ┆ activity ┆ pixel │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ f64 ┆ f64 ┆ bool ┆ str ┆ str ┆ str ┆ list[f64] │\n", + "╞════════════╪═══════╪══════════╪════════════╪══════════╪══════════╪═════════════════╡\n", + "│ 709376.0 ┆ 206.0 ┆ null ┆ null ┆ null ┆ null ┆ [102.2, 101.4] │\n", + "│ 709376.5 ┆ 210.0 ┆ null ┆ null ┆ null ┆ null ┆ [104.1, 99.8] │\n", + "│ 709377.0 ┆ 206.0 ┆ null ┆ null ┆ null ┆ null ┆ [99.1, 106.5] │\n", + "│ 709377.5 ┆ 204.0 ┆ null ┆ null ┆ null ┆ null ┆ [85.2, 103.3] │\n", + "│ 709378.0 ┆ 202.0 ┆ null ┆ null ┆ null ┆ null ┆ [98.2, 106.7] │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 4.014271e6 ┆ 345.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ [1192.4, 854.6] │\n", + "│ 4014271.5 ┆ 342.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ [1193.9, 853.4] │\n", + "│ 4.014272e6 ┆ 342.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ [1189.7, 856.1] │\n", + "│ 4014272.5 ┆ 337.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ [1197.2, 850.1] │\n", + "│ 4.014273e6 ┆ 343.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ [1192.9, 848.7] │\n", + "└────────────┴───────┴──────────┴────────────┴──────────┴──────────┴─────────────────┘\n" + ] + } + ], + "source": [ + "asc = \"ch1hr007.asc\"\n", + "\n", + "data, metadata = pm.gaze.from_asc(\n", + " asc,\n", + " patterns=[\n", + " r\"start_recording_(?P(?:PRACTICE_)?trial_\\d+)_(?P.+)\",\n", + " {\"pattern\": r\"stop_recording_\", \"column\": \"trial\", \"value\": None},\n", + " {\"pattern\": r\"stop_recording_\", \"column\": \"screen\", \"value\": None},\n", + " {\n", + " \"pattern\": r\"start_recording_(?:PRACTICE_)?trial_\\d+_page_\\d+\",\n", + " \"column\": \"activity\",\n", + " \"value\": \"reading\",\n", + " },\n", + " {\n", + " \"pattern\": r\"start_recording_(?:PRACTICE_)?trial_\\d+_question_\\d+\",\n", + " \"column\": \"activity\",\n", + " \"value\": \"question\",\n", + " },\n", + " {\n", + " \"pattern\": r\"start_recording_(?:PRACTICE_)?trial_\\d+_(familiarity_rating_screen_\\d+|subject_difficulty_screen)\",\n", + " \"column\": \"activity\",\n", + " \"value\": \"rating\",\n", + " },\n", + " {\"pattern\": r\"stop_recording_\", \"column\": \"activity\", \"value\": None},\n", + " {\n", + " \"pattern\": r\"start_recording_PRACTICE_trial_\",\n", + " \"column\": \"practice\",\n", + " \"value\": True,\n", + " },\n", + " {\n", + " \"pattern\": r\"start_recording_trial_\",\n", + " \"column\": \"practice\",\n", + " \"value\": False,\n", + " },\n", + " {\"pattern\": r\"stop_recording_\", \"column\": \"practice\", \"value\": None},\n", + " ],\n", + ")\n", + "data.frame\n", + "\n", + "print(data)" + ] + }, + { + "cell_type": "markdown", + "id": "7e3b6a23-efa2-497d-b6c1-14f8808ed914", + "metadata": {}, + "source": [ + "We will split the pixel column into two for x and y coordinates of the gaze." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "97c48a35-87a8-479a-80d1-8fa9ba3668a3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (3_220_492, 8)
timepupilpracticescreentrialactivitypixel_xpixel_y
f64f64boolstrstrstrf64f64
709376.0206.0nullnullnullnull102.2101.4
709376.5210.0nullnullnullnull104.199.8
709377.0206.0nullnullnullnull99.1106.5
709377.5204.0nullnullnullnull85.2103.3
709378.0202.0nullnullnullnull98.2106.7
4.014271e6345.0false"question_6""trial_10""question"1192.4854.6
4014271.5342.0false"question_6""trial_10""question"1193.9853.4
4.014272e6342.0false"question_6""trial_10""question"1189.7856.1
4014272.5337.0false"question_6""trial_10""question"1197.2850.1
4.014273e6343.0false"question_6""trial_10""question"1192.9848.7
" + ], + "text/plain": [ + "shape: (3_220_492, 8)\n", + "┌────────────┬───────┬──────────┬────────────┬──────────┬──────────┬─────────┬─────────┐\n", + "│ time ┆ pupil ┆ practice ┆ screen ┆ trial ┆ activity ┆ pixel_x ┆ pixel_y │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ f64 ┆ f64 ┆ bool ┆ str ┆ str ┆ str ┆ f64 ┆ f64 │\n", + "╞════════════╪═══════╪══════════╪════════════╪══════════╪══════════╪═════════╪═════════╡\n", + "│ 709376.0 ┆ 206.0 ┆ null ┆ null ┆ null ┆ null ┆ 102.2 ┆ 101.4 │\n", + "│ 709376.5 ┆ 210.0 ┆ null ┆ null ┆ null ┆ null ┆ 104.1 ┆ 99.8 │\n", + "│ 709377.0 ┆ 206.0 ┆ null ┆ null ┆ null ┆ null ┆ 99.1 ┆ 106.5 │\n", + "│ 709377.5 ┆ 204.0 ┆ null ┆ null ┆ null ┆ null ┆ 85.2 ┆ 103.3 │\n", + "│ 709378.0 ┆ 202.0 ┆ null ┆ null ┆ null ┆ null ┆ 98.2 ┆ 106.7 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 4.014271e6 ┆ 345.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ 1192.4 ┆ 854.6 │\n", + "│ 4014271.5 ┆ 342.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ 1193.9 ┆ 853.4 │\n", + "│ 4.014272e6 ┆ 342.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ 1189.7 ┆ 856.1 │\n", + "│ 4014272.5 ┆ 337.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ 1197.2 ┆ 850.1 │\n", + "│ 4.014273e6 ┆ 343.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ 1192.9 ┆ 848.7 │\n", + "└────────────┴───────┴──────────┴────────────┴──────────┴──────────┴─────────┴─────────┘" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Unnest the 'pixel' column\n", + "\n", + "data.frame = data.frame.select(\n", + " [\n", + " pl.all().exclude(\"pixel\"),\n", + " pl.col(\"pixel\").list.get(0).alias(\"pixel_x\"),\n", + " pl.col(\"pixel\").list.get(1).alias(\"pixel_y\"),\n", + " ]\n", + ")\n", + "data.frame" + ] + }, + { + "cell_type": "markdown", + "id": "29703848-c884-4ef0-9049-46b5a08d7080", + "metadata": {}, + "source": [ + "# Extracting quality measures" + ] + }, + { + "cell_type": "markdown", + "id": "6fe936fb-b2a4-4d98-89b7-b12ec215c150", + "metadata": {}, + "source": [ + "The following function is meant to check if the sampling rate of the eyetracker ever deviated from the expected value.\n", + "It checks if consecutive timepoints ever differ by more than the value of the expected_diff argument. We're checking only the rows when a task definied by activity_id is performed.\n", + "An eyetracker with a constant refresh rate should return 0 skipped_time_absolute and a 0 skipped_time_ratio.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0374bd91-0424-490f-881f-83b0fa292caa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0, 0.0)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## function calculating for skipped time\n", + "\n", + "def time_loss(df: pl.DataFrame, task_column: str = 'activity', activity_id: str = 'page', target_column: str = 'time', expected_diff: float = 0.5, tolerance: float = 1e-7):\n", + " # Filter the DataFrame for rows where the 'activity' column contains the word 'page'\n", + " filtered_df = df.filter(pl.col(task_column).str.contains(activity_id))\n", + " # Calculate the difference between consecutive rows\n", + " differences = filtered_df[target_column].diff().drop_nulls()\n", + " # Store difference between timestep and expected_diff, where the difference is signifficant\n", + " large_differences = differences.filter(differences > expected_diff) - expected_diff\n", + " # total skipped time\n", + " skipped_time_absolute = sum(large_differences)\n", + " # ratio of skipped time to experiment duration\n", + " total_duration = (df[target_column][len(df)-1] - df[target_column][0])\n", + " skipped_time_ratio = (skipped_time_absolute / total_duration)\n", + " return skipped_time_absolute, skipped_time_ratio\n", + " \n", + "time_loss(data.frame)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "88bbc1a7-e7af-45f2-9c9c-f447f4c11531", + "metadata": {}, + "outputs": [], + "source": [ + "def missing_pupil(df, sampling_rate, pupil_col):\n", + " miss_pupil_tuple = df[pupil_col].value_counts().row(by_predicate=(pl.col(pupil_col)==0.0))\n", + " abs_miss_pupil = miss_pupil_tuple[1] / sampling_rate\n", + " per_miss_pupil = miss_pupil_tuple[1]/(df.height)\n", + " return per_miss_pupil, abs_miss_pupil" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "2bd80359-95d4-4b8c-92c8-47835ae94aa4", + "metadata": {}, + "outputs": [], + "source": [ + "def missing_gaze(df, sampling_rate, gaze_x_col):\n", + " abs_miss_gaze_x = data.frame.select(pl.col(gaze_x_col).is_null().sum()).item()\n", + " per_miss_gaze_x = abs_miss_gaze_x /(df.height)\n", + " abs_miss_gaze_x / sampling_rate\n", + " return per_miss_gaze_x, abs_miss_gaze_x" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "2b78a3d2-d40f-4b85-9f5d-2a3dce9265b2", + "metadata": {}, + "outputs": [], + "source": [ + "def off_task_time(df, sampling_rate, data_col):\n", + " null_values_tab = df.null_count()\n", + " abs_miss_screen = null_values_tab[data_col][0]\n", + " per_miss_screen = abs_miss_screen / (df.height)\n", + " return abs_miss_screen/1000, per_miss_screen" + ] + }, + { + "cell_type": "markdown", + "id": "3b60c1f7-8396-45ab-9224-92e3019c16b0", + "metadata": {}, + "source": [ + "The following function gets the information about validation, specifically average value of all of the validations and maximal value of all of the validations. It is called in the next get_qual_check function." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c867d088-14cd-404e-bbc9-2174e46d45c1", + "metadata": {}, + "outputs": [], + "source": [ + "def get_validation_data(validations):\n", + " sum_average = 0.0\n", + " max_values = []\n", + " for validation in validations:\n", + " sum_average += float(validation['validation_score_avg'])\n", + " max_values.append(float(validation['validation_score_max']))\n", + " average_average = sum_average / len(validations)\n", + " global_max = max(max_values)\n", + " return average_average, global_max \n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "8d9325c0-63ea-4249-a70e-5bd4a97a4ec5", + "metadata": {}, + "source": [ + "The following function extracts certain signifficant quality measures from the metadata and makes use of the functions above to calculate its own measures. The result is saved as a csv file." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "fb241efc-5e08-4f3e-9bff-c389213633c0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'sampling_rate': 2000.0, 'data_loss_ratio': 0.04547720970203655, 'data_loss_ratio_blinks': 0.044510408788512146, 'total_recording_duration_sec': 1610.466, 'missing_pupil_ratio': 0.0453467979426746, 'missing_pupil_sec': 73.0195, 'missing_gaze_ratio': 0.0453467979426746, 'missing_gaze_sec': 146039, 'off_task_time_sec': 1016.955, 'off_task_time_ratio': 0.3157762851141999, 'average_validation_score': 0.42266666666666663, 'global_max_validation_score': 1.89, 'time_loss_sec': 0, 'time_loss_ratio': 0.0, 'PRACTICE_trial_1_null_ratio': 0.0795467070128153, 'PRACTICE_trial_1_average_validation': '0.41', 'PRACTICE_trial_1_max_validation': '0.63', 'PRACTICE_trial_1_error': 'GOOD ERROR', 'PRACTICE_trial_1_tracked_eye': 'RIGHT', 'PRACTICE_trial_2_null_ratio': 0.07295313856754715, 'PRACTICE_trial_2_average_validation': '1.44', 'PRACTICE_trial_2_max_validation': '1.89', 'PRACTICE_trial_2_error': 'FAIR ERROR', 'PRACTICE_trial_2_tracked_eye': 'RIGHT', 'trial_1_null_ratio': 0.0659771826394897, 'trial_1_average_validation': '0.46', 'trial_1_max_validation': '0.91', 'trial_1_error': 'GOOD ERROR', 'trial_1_tracked_eye': 'RIGHT', 'trial_2_null_ratio': 0.05499177626232743, 'trial_2_average_validation': '0.18', 'trial_2_max_validation': '0.43', 'trial_2_error': 'GOOD ERROR', 'trial_2_tracked_eye': 'RIGHT', 'trial_3_null_ratio': 0.050450814192191884, 'trial_3_average_validation': '0.26', 'trial_3_max_validation': '0.63', 'trial_3_error': 'GOOD ERROR', 'trial_3_tracked_eye': 'RIGHT', 'trial_4_null_ratio': 0.05847555828958095, 'trial_4_average_validation': '0.26', 'trial_4_max_validation': '0.36', 'trial_4_error': 'GOOD ERROR', 'trial_4_tracked_eye': 'RIGHT', 'trial_5_null_ratio': 0.10752111673790876, 'trial_5_average_validation': '0.32', 'trial_5_max_validation': '0.41', 'trial_5_error': 'GOOD ERROR', 'trial_5_tracked_eye': 'RIGHT', 'trial_6_null_ratio': 0.03940737476936629, 'trial_6_average_validation': '0.44', 'trial_6_max_validation': '0.57', 'trial_6_error': 'GOOD ERROR', 'trial_6_tracked_eye': 'RIGHT', 'trial_7_null_ratio': 0.052679978980556985, 'trial_7_average_validation': '0.27', 'trial_7_max_validation': '0.44', 'trial_7_error': 'GOOD ERROR', 'trial_7_tracked_eye': 'RIGHT', 'trial_8_null_ratio': 0.05433123214813351, 'trial_8_average_validation': '0.20', 'trial_8_max_validation': '0.49', 'trial_8_error': 'GOOD ERROR', 'trial_8_tracked_eye': 'RIGHT', 'trial_9_null_ratio': 0.06259078730753087, 'trial_9_average_validation': '0.36', 'trial_9_max_validation': '0.64', 'trial_9_error': 'GOOD ERROR', 'trial_9_tracked_eye': 'RIGHT', 'trial_10_null_ratio': 0.0697597766446284, 'trial_10_average_validation': '0.28', 'trial_10_max_validation': '0.36', 'trial_10_error': 'GOOD ERROR', 'trial_10_tracked_eye': 'RIGHT'}\n" + ] + } + ], + "source": [ + "def get_qual_check(\n", + " df: pl.DataFrame, # data frame with raw values\n", + " metadata: dict, # dictionary with metadata\n", + " csv_name: str = 'out.csv', # name of the output csv file, need to end with .csv\n", + " pupil_col: str = \"pupil\", # column in df where pupil data are stored\n", + " data_col: str = 'screen', # column in df where the screen activity is stored\n", + " gaze_x_col: str = 'pixel_x', # column in df with the gaze X coordinates\n", + " trial_col: str = 'trial'): # column in df with the trial runs\n", + "\n", + " measures_dict = {}\n", + " # check metadata values\n", + " measures_dict['sampling_rate'] = metadata['sampling_rate']\n", + " measures_dict['data_loss_ratio'] = metadata['data_loss_ratio']\n", + " measures_dict['data_loss_ratio_blinks'] = metadata['data_loss_ratio_blinks']\n", + " measures_dict['total_recording_duration_sec'] = metadata['total_recording_duration_ms'] / 1000\n", + " \n", + " # Check amount of pupil omissions\n", + " measures_dict['missing_pupil_ratio'], measures_dict['missing_pupil_sec'] = missing_pupil(df, measures_dict['sampling_rate'], pupil_col)\n", + "\n", + " # Check amount of missing gaze data\n", + " measures_dict['missing_gaze_ratio'], measures_dict['missing_gaze_sec'] = missing_gaze(df, measures_dict['sampling_rate'], gaze_x_col)\n", + "\n", + " # Check the amount of time spent not on experimental tasks\n", + "\n", + " measures_dict['off_task_time_sec'], measures_dict['off_task_time_ratio'] = off_task_time(df, metadata['sampling_rate'], data_col)\n", + "\n", + " # Check the average quality of validation\n", + " measures_dict['average_validation_score'], measures_dict['global_max_validation_score'] = get_validation_data(metadata['validations'])\n", + " \n", + " #Check time loss\n", + " measures_dict['time_loss_sec'] = time_loss(df)[0]\n", + " measures_dict['time_loss_ratio'] = time_loss(df)[1]\n", + " \n", + " # Divide data frame by trials\n", + " list_of_trials_raw = data.frame.partition_by(by=trial_col)\n", + " list_of_trials = [i for i in list_of_trials_raw if i.item(1,trial_col) is not None]\n", + " i=0\n", + " \n", + " # Check the quality measures for separate trials\n", + " for trial in list_of_trials:\n", + " null_ratio_expr = pm.measure.measures.null_ratio(\"pixel_x\", pl.Float64)\n", + " null_ratio = trial.select([null_ratio_expr]).item()\n", + " trial_name = str(trial.item(1,trial_col))\n", + " measures_dict[trial_name + '_null_ratio'] = null_ratio\n", + " measures_dict[trial_name + '_average_validation'] = metadata['validations'][i][\"validation_score_avg\"]\n", + " measures_dict[trial_name + '_max_validation'] = metadata['validations'][i][\"validation_score_max\"]\n", + " measures_dict[trial_name + '_error'] = metadata['validations'][i][\"error\"]\n", + " measures_dict[trial_name + '_tracked_eye'] = metadata['validations'][i][\"tracked_eye\"]\n", + " i+=1\n", + " \n", + " # Save measures in csv\n", + " with open(csv_name, \"w\", newline=\"\") as f:\n", + " w = csv.DictWriter(f, measures_dict.keys())\n", + " w.writeheader()\n", + " w.writerow(measures_dict)\n", + " print(measures_dict)\n", + "\n", + "get_qual_check(data.frame, metadata)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1a67a5f-3f5e-4473-8e19-670b2784ed9f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From c97690648d046e8dc7473237f84a7d6cca1914dc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 25 Oct 2024 13:58:24 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../tutorials/eyetracking_measures.ipynb | 247 ++++++------------ 1 file changed, 78 insertions(+), 169 deletions(-) diff --git a/docs/source/tutorials/eyetracking_measures.ipynb b/docs/source/tutorials/eyetracking_measures.ipynb index 212704d71..3d7de5d69 100644 --- a/docs/source/tutorials/eyetracking_measures.ipynb +++ b/docs/source/tutorials/eyetracking_measures.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "e20f3277-92cf-43bd-bd9f-8e73d20e98bb", + "id": "0", "metadata": {}, "source": [ "# Quality measures tutorial\n", @@ -13,58 +13,33 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "9da5b864-1f8c-404d-b198-752b9dcbcbd4", + "execution_count": null, + "id": "1", "metadata": {}, "outputs": [], "source": [ + "import csv\n", "from pathlib import Path\n", - "import pymovements as pm\n", + "\n", "import polars as pl\n", - "import csv" + "\n", + "import pymovements as pm" ] }, { "cell_type": "markdown", - "id": "90641553-c477-46d6-bb2b-22eeeaef6eb9", - "metadata": { - "tags": [] - }, + "id": "2", + "metadata": {}, "source": [ "After importing some basic libraries let's load an example eyetracking file." ] }, { "cell_type": "code", - "execution_count": 8, - "id": "133591f7-a070-4cba-8d23-3f8183e22a1a", + "execution_count": null, + "id": "3", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "shape: (3_220_492, 7)\n", - "┌────────────┬───────┬──────────┬────────────┬──────────┬──────────┬─────────────────┐\n", - "│ time ┆ pupil ┆ practice ┆ screen ┆ trial ┆ activity ┆ pixel │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ f64 ┆ f64 ┆ bool ┆ str ┆ str ┆ str ┆ list[f64] │\n", - "╞════════════╪═══════╪══════════╪════════════╪══════════╪══════════╪═════════════════╡\n", - "│ 709376.0 ┆ 206.0 ┆ null ┆ null ┆ null ┆ null ┆ [102.2, 101.4] │\n", - "│ 709376.5 ┆ 210.0 ┆ null ┆ null ┆ null ┆ null ┆ [104.1, 99.8] │\n", - "│ 709377.0 ┆ 206.0 ┆ null ┆ null ┆ null ┆ null ┆ [99.1, 106.5] │\n", - "│ 709377.5 ┆ 204.0 ┆ null ┆ null ┆ null ┆ null ┆ [85.2, 103.3] │\n", - "│ 709378.0 ┆ 202.0 ┆ null ┆ null ┆ null ┆ null ┆ [98.2, 106.7] │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 4.014271e6 ┆ 345.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ [1192.4, 854.6] │\n", - "│ 4014271.5 ┆ 342.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ [1193.9, 853.4] │\n", - "│ 4.014272e6 ┆ 342.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ [1189.7, 856.1] │\n", - "│ 4014272.5 ┆ 337.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ [1197.2, 850.1] │\n", - "│ 4.014273e6 ┆ 343.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ [1192.9, 848.7] │\n", - "└────────────┴───────┴──────────┴────────────┴──────────┴──────────┴─────────────────┘\n" - ] - } - ], + "outputs": [], "source": [ "asc = \"ch1hr007.asc\"\n", "\n", @@ -110,7 +85,7 @@ }, { "cell_type": "markdown", - "id": "7e3b6a23-efa2-497d-b6c1-14f8808ed914", + "id": "4", "metadata": {}, "source": [ "We will split the pixel column into two for x and y coordinates of the gaze." @@ -118,48 +93,10 @@ }, { "cell_type": "code", - "execution_count": 9, - "id": "97c48a35-87a8-479a-80d1-8fa9ba3668a3", + "execution_count": null, + "id": "5", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (3_220_492, 8)
timepupilpracticescreentrialactivitypixel_xpixel_y
f64f64boolstrstrstrf64f64
709376.0206.0nullnullnullnull102.2101.4
709376.5210.0nullnullnullnull104.199.8
709377.0206.0nullnullnullnull99.1106.5
709377.5204.0nullnullnullnull85.2103.3
709378.0202.0nullnullnullnull98.2106.7
4.014271e6345.0false"question_6""trial_10""question"1192.4854.6
4014271.5342.0false"question_6""trial_10""question"1193.9853.4
4.014272e6342.0false"question_6""trial_10""question"1189.7856.1
4014272.5337.0false"question_6""trial_10""question"1197.2850.1
4.014273e6343.0false"question_6""trial_10""question"1192.9848.7
" - ], - "text/plain": [ - "shape: (3_220_492, 8)\n", - "┌────────────┬───────┬──────────┬────────────┬──────────┬──────────┬─────────┬─────────┐\n", - "│ time ┆ pupil ┆ practice ┆ screen ┆ trial ┆ activity ┆ pixel_x ┆ pixel_y │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", - "│ f64 ┆ f64 ┆ bool ┆ str ┆ str ┆ str ┆ f64 ┆ f64 │\n", - "╞════════════╪═══════╪══════════╪════════════╪══════════╪══════════╪═════════╪═════════╡\n", - "│ 709376.0 ┆ 206.0 ┆ null ┆ null ┆ null ┆ null ┆ 102.2 ┆ 101.4 │\n", - "│ 709376.5 ┆ 210.0 ┆ null ┆ null ┆ null ┆ null ┆ 104.1 ┆ 99.8 │\n", - "│ 709377.0 ┆ 206.0 ┆ null ┆ null ┆ null ┆ null ┆ 99.1 ┆ 106.5 │\n", - "│ 709377.5 ┆ 204.0 ┆ null ┆ null ┆ null ┆ null ┆ 85.2 ┆ 103.3 │\n", - "│ 709378.0 ┆ 202.0 ┆ null ┆ null ┆ null ┆ null ┆ 98.2 ┆ 106.7 │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 4.014271e6 ┆ 345.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ 1192.4 ┆ 854.6 │\n", - "│ 4014271.5 ┆ 342.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ 1193.9 ┆ 853.4 │\n", - "│ 4.014272e6 ┆ 342.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ 1189.7 ┆ 856.1 │\n", - "│ 4014272.5 ┆ 337.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ 1197.2 ┆ 850.1 │\n", - "│ 4.014273e6 ┆ 343.0 ┆ false ┆ question_6 ┆ trial_10 ┆ question ┆ 1192.9 ┆ 848.7 │\n", - "└────────────┴───────┴──────────┴────────────┴──────────┴──────────┴─────────┴─────────┘" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Unnest the 'pixel' column\n", "\n", @@ -175,7 +112,7 @@ }, { "cell_type": "markdown", - "id": "29703848-c884-4ef0-9049-46b5a08d7080", + "id": "6", "metadata": {}, "source": [ "# Extracting quality measures" @@ -183,7 +120,7 @@ }, { "cell_type": "markdown", - "id": "6fe936fb-b2a4-4d98-89b7-b12ec215c150", + "id": "7", "metadata": {}, "source": [ "The following function is meant to check if the sampling rate of the eyetracker ever deviated from the expected value.\n", @@ -193,25 +130,16 @@ }, { "cell_type": "code", - "execution_count": 10, - "id": "0374bd91-0424-490f-881f-83b0fa292caa", + "execution_count": null, + "id": "8", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(0, 0.0)" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "## function calculating for skipped time\n", + "# function calculating for skipped time\n", + "\n", "\n", - "def time_loss(df: pl.DataFrame, task_column: str = 'activity', activity_id: str = 'page', target_column: str = 'time', expected_diff: float = 0.5, tolerance: float = 1e-7):\n", + "def time_loss(df: pl.DataFrame, task_column: str = 'activity', activity_id: str = 'page',\n", + " target_column: str = 'time', expected_diff: float = 0.5, tolerance: float = 1e-7):\n", " # Filter the DataFrame for rows where the 'activity' column contains the word 'page'\n", " filtered_df = df.filter(pl.col(task_column).str.contains(activity_id))\n", " # Calculate the difference between consecutive rows\n", @@ -221,45 +149,46 @@ " # total skipped time\n", " skipped_time_absolute = sum(large_differences)\n", " # ratio of skipped time to experiment duration\n", - " total_duration = (df[target_column][len(df)-1] - df[target_column][0])\n", + " total_duration = (df[target_column][len(df) - 1] - df[target_column][0])\n", " skipped_time_ratio = (skipped_time_absolute / total_duration)\n", " return skipped_time_absolute, skipped_time_ratio\n", - " \n", + "\n", + "\n", "time_loss(data.frame)" ] }, { "cell_type": "code", - "execution_count": 19, - "id": "88bbc1a7-e7af-45f2-9c9c-f447f4c11531", + "execution_count": null, + "id": "9", "metadata": {}, "outputs": [], "source": [ "def missing_pupil(df, sampling_rate, pupil_col):\n", - " miss_pupil_tuple = df[pupil_col].value_counts().row(by_predicate=(pl.col(pupil_col)==0.0))\n", + " miss_pupil_tuple = df[pupil_col].value_counts().row(by_predicate=(pl.col(pupil_col) == 0.0))\n", " abs_miss_pupil = miss_pupil_tuple[1] / sampling_rate\n", - " per_miss_pupil = miss_pupil_tuple[1]/(df.height)\n", + " per_miss_pupil = miss_pupil_tuple[1] / (df.height)\n", " return per_miss_pupil, abs_miss_pupil" ] }, { "cell_type": "code", - "execution_count": 23, - "id": "2bd80359-95d4-4b8c-92c8-47835ae94aa4", + "execution_count": null, + "id": "10", "metadata": {}, "outputs": [], "source": [ "def missing_gaze(df, sampling_rate, gaze_x_col):\n", " abs_miss_gaze_x = data.frame.select(pl.col(gaze_x_col).is_null().sum()).item()\n", - " per_miss_gaze_x = abs_miss_gaze_x /(df.height)\n", + " per_miss_gaze_x = abs_miss_gaze_x / (df.height)\n", " abs_miss_gaze_x / sampling_rate\n", " return per_miss_gaze_x, abs_miss_gaze_x" ] }, { "cell_type": "code", - "execution_count": 25, - "id": "2b78a3d2-d40f-4b85-9f5d-2a3dce9265b2", + "execution_count": null, + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -267,12 +196,12 @@ " null_values_tab = df.null_count()\n", " abs_miss_screen = null_values_tab[data_col][0]\n", " per_miss_screen = abs_miss_screen / (df.height)\n", - " return abs_miss_screen/1000, per_miss_screen" + " return abs_miss_screen / 1000, per_miss_screen" ] }, { "cell_type": "markdown", - "id": "3b60c1f7-8396-45ab-9224-92e3019c16b0", + "id": "12", "metadata": {}, "source": [ "The following function gets the information about validation, specifically average value of all of the validations and maximal value of all of the validations. It is called in the next get_qual_check function." @@ -280,8 +209,8 @@ }, { "cell_type": "code", - "execution_count": 14, - "id": "c867d088-14cd-404e-bbc9-2174e46d45c1", + "execution_count": null, + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -293,13 +222,12 @@ " max_values.append(float(validation['validation_score_max']))\n", " average_average = sum_average / len(validations)\n", " global_max = max(max_values)\n", - " return average_average, global_max \n", - " " + " return average_average, global_max" ] }, { "cell_type": "markdown", - "id": "8d9325c0-63ea-4249-a70e-5bd4a97a4ec5", + "id": "14", "metadata": {}, "source": [ "The following function extracts certain signifficant quality measures from the metadata and makes use of the functions above to calculate its own measures. The result is saved as a csv file." @@ -307,27 +235,19 @@ }, { "cell_type": "code", - "execution_count": 26, - "id": "fb241efc-5e08-4f3e-9bff-c389213633c0", + "execution_count": null, + "id": "15", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'sampling_rate': 2000.0, 'data_loss_ratio': 0.04547720970203655, 'data_loss_ratio_blinks': 0.044510408788512146, 'total_recording_duration_sec': 1610.466, 'missing_pupil_ratio': 0.0453467979426746, 'missing_pupil_sec': 73.0195, 'missing_gaze_ratio': 0.0453467979426746, 'missing_gaze_sec': 146039, 'off_task_time_sec': 1016.955, 'off_task_time_ratio': 0.3157762851141999, 'average_validation_score': 0.42266666666666663, 'global_max_validation_score': 1.89, 'time_loss_sec': 0, 'time_loss_ratio': 0.0, 'PRACTICE_trial_1_null_ratio': 0.0795467070128153, 'PRACTICE_trial_1_average_validation': '0.41', 'PRACTICE_trial_1_max_validation': '0.63', 'PRACTICE_trial_1_error': 'GOOD ERROR', 'PRACTICE_trial_1_tracked_eye': 'RIGHT', 'PRACTICE_trial_2_null_ratio': 0.07295313856754715, 'PRACTICE_trial_2_average_validation': '1.44', 'PRACTICE_trial_2_max_validation': '1.89', 'PRACTICE_trial_2_error': 'FAIR ERROR', 'PRACTICE_trial_2_tracked_eye': 'RIGHT', 'trial_1_null_ratio': 0.0659771826394897, 'trial_1_average_validation': '0.46', 'trial_1_max_validation': '0.91', 'trial_1_error': 'GOOD ERROR', 'trial_1_tracked_eye': 'RIGHT', 'trial_2_null_ratio': 0.05499177626232743, 'trial_2_average_validation': '0.18', 'trial_2_max_validation': '0.43', 'trial_2_error': 'GOOD ERROR', 'trial_2_tracked_eye': 'RIGHT', 'trial_3_null_ratio': 0.050450814192191884, 'trial_3_average_validation': '0.26', 'trial_3_max_validation': '0.63', 'trial_3_error': 'GOOD ERROR', 'trial_3_tracked_eye': 'RIGHT', 'trial_4_null_ratio': 0.05847555828958095, 'trial_4_average_validation': '0.26', 'trial_4_max_validation': '0.36', 'trial_4_error': 'GOOD ERROR', 'trial_4_tracked_eye': 'RIGHT', 'trial_5_null_ratio': 0.10752111673790876, 'trial_5_average_validation': '0.32', 'trial_5_max_validation': '0.41', 'trial_5_error': 'GOOD ERROR', 'trial_5_tracked_eye': 'RIGHT', 'trial_6_null_ratio': 0.03940737476936629, 'trial_6_average_validation': '0.44', 'trial_6_max_validation': '0.57', 'trial_6_error': 'GOOD ERROR', 'trial_6_tracked_eye': 'RIGHT', 'trial_7_null_ratio': 0.052679978980556985, 'trial_7_average_validation': '0.27', 'trial_7_max_validation': '0.44', 'trial_7_error': 'GOOD ERROR', 'trial_7_tracked_eye': 'RIGHT', 'trial_8_null_ratio': 0.05433123214813351, 'trial_8_average_validation': '0.20', 'trial_8_max_validation': '0.49', 'trial_8_error': 'GOOD ERROR', 'trial_8_tracked_eye': 'RIGHT', 'trial_9_null_ratio': 0.06259078730753087, 'trial_9_average_validation': '0.36', 'trial_9_max_validation': '0.64', 'trial_9_error': 'GOOD ERROR', 'trial_9_tracked_eye': 'RIGHT', 'trial_10_null_ratio': 0.0697597766446284, 'trial_10_average_validation': '0.28', 'trial_10_max_validation': '0.36', 'trial_10_error': 'GOOD ERROR', 'trial_10_tracked_eye': 'RIGHT'}\n" - ] - } - ], + "outputs": [], "source": [ "def get_qual_check(\n", - " df: pl.DataFrame, # data frame with raw values\n", - " metadata: dict, # dictionary with metadata\n", - " csv_name: str = 'out.csv', # name of the output csv file, need to end with .csv\n", - " pupil_col: str = \"pupil\", # column in df where pupil data are stored\n", - " data_col: str = 'screen', # column in df where the screen activity is stored\n", - " gaze_x_col: str = 'pixel_x', # column in df with the gaze X coordinates\n", - " trial_col: str = 'trial'): # column in df with the trial runs\n", + " df: pl.DataFrame, # data frame with raw values\n", + " metadata: dict, # dictionary with metadata\n", + " csv_name: str = 'out.csv', # name of the output csv file, need to end with .csv\n", + " pupil_col: str = \"pupil\", # column in df where pupil data are stored\n", + " data_col: str = 'screen', # column in df where the screen activity is stored\n", + " gaze_x_col: str = 'pixel_x', # column in df with the gaze X coordinates\n", + " trial_col: str = 'trial'): # column in df with the trial runs\n", "\n", " measures_dict = {}\n", " # check metadata values\n", @@ -335,41 +255,47 @@ " measures_dict['data_loss_ratio'] = metadata['data_loss_ratio']\n", " measures_dict['data_loss_ratio_blinks'] = metadata['data_loss_ratio_blinks']\n", " measures_dict['total_recording_duration_sec'] = metadata['total_recording_duration_ms'] / 1000\n", - " \n", + "\n", " # Check amount of pupil omissions\n", - " measures_dict['missing_pupil_ratio'], measures_dict['missing_pupil_sec'] = missing_pupil(df, measures_dict['sampling_rate'], pupil_col)\n", + " measures_dict['missing_pupil_ratio'], measures_dict['missing_pupil_sec'] = missing_pupil(\n", + " df, measures_dict['sampling_rate'], pupil_col)\n", "\n", " # Check amount of missing gaze data\n", - " measures_dict['missing_gaze_ratio'], measures_dict['missing_gaze_sec'] = missing_gaze(df, measures_dict['sampling_rate'], gaze_x_col)\n", + " measures_dict['missing_gaze_ratio'], measures_dict['missing_gaze_sec'] = missing_gaze(\n", + " df, measures_dict['sampling_rate'], gaze_x_col)\n", "\n", " # Check the amount of time spent not on experimental tasks\n", "\n", - " measures_dict['off_task_time_sec'], measures_dict['off_task_time_ratio'] = off_task_time(df, metadata['sampling_rate'], data_col)\n", + " measures_dict['off_task_time_sec'], measures_dict['off_task_time_ratio'] = off_task_time(\n", + " df, metadata['sampling_rate'], data_col)\n", "\n", " # Check the average quality of validation\n", - " measures_dict['average_validation_score'], measures_dict['global_max_validation_score'] = get_validation_data(metadata['validations'])\n", - " \n", - " #Check time loss\n", + " measures_dict['average_validation_score'], measures_dict['global_max_validation_score'] = get_validation_data(\n", + " metadata['validations'])\n", + "\n", + " # Check time loss\n", " measures_dict['time_loss_sec'] = time_loss(df)[0]\n", " measures_dict['time_loss_ratio'] = time_loss(df)[1]\n", - " \n", + "\n", " # Divide data frame by trials\n", " list_of_trials_raw = data.frame.partition_by(by=trial_col)\n", - " list_of_trials = [i for i in list_of_trials_raw if i.item(1,trial_col) is not None]\n", - " i=0\n", - " \n", + " list_of_trials = [i for i in list_of_trials_raw if i.item(1, trial_col) is not None]\n", + " i = 0\n", + "\n", " # Check the quality measures for separate trials\n", " for trial in list_of_trials:\n", " null_ratio_expr = pm.measure.measures.null_ratio(\"pixel_x\", pl.Float64)\n", " null_ratio = trial.select([null_ratio_expr]).item()\n", - " trial_name = str(trial.item(1,trial_col))\n", + " trial_name = str(trial.item(1, trial_col))\n", " measures_dict[trial_name + '_null_ratio'] = null_ratio\n", - " measures_dict[trial_name + '_average_validation'] = metadata['validations'][i][\"validation_score_avg\"]\n", - " measures_dict[trial_name + '_max_validation'] = metadata['validations'][i][\"validation_score_max\"]\n", + " measures_dict[trial_name +\n", + " '_average_validation'] = metadata['validations'][i][\"validation_score_avg\"]\n", + " measures_dict[trial_name +\n", + " '_max_validation'] = metadata['validations'][i][\"validation_score_max\"]\n", " measures_dict[trial_name + '_error'] = metadata['validations'][i][\"error\"]\n", " measures_dict[trial_name + '_tracked_eye'] = metadata['validations'][i][\"tracked_eye\"]\n", - " i+=1\n", - " \n", + " i += 1\n", + "\n", " # Save measures in csv\n", " with open(csv_name, \"w\", newline=\"\") as f:\n", " w = csv.DictWriter(f, measures_dict.keys())\n", @@ -377,37 +303,20 @@ " w.writerow(measures_dict)\n", " print(measures_dict)\n", "\n", + "\n", "get_qual_check(data.frame, metadata)" ] }, { "cell_type": "code", "execution_count": null, - "id": "b1a67a5f-3f5e-4473-8e19-670b2784ed9f", + "id": "16", "metadata": {}, "outputs": [], "source": [] } ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" - } - }, + "metadata": {}, "nbformat": 4, "nbformat_minor": 5 }