From 045f6a234692aa520ddf352796ab7023d4b3b0ed Mon Sep 17 00:00:00 2001 From: "Daniel G. Krakowczyk" Date: Tue, 12 Sep 2023 13:27:55 +0200 Subject: [PATCH] feat: Add kwargs for explicit column passing in gaze.from_numpy() (#513) --- src/pymovements/gaze/integration.py | 178 ++++++++++++++++++++++++++- tests/gaze/integration_numpy_test.py | 105 ++++++++++++++-- 2 files changed, 273 insertions(+), 10 deletions(-) diff --git a/src/pymovements/gaze/integration.py b/src/pymovements/gaze/integration.py index 66cc17cda..ec7b5f70a 100644 --- a/src/pymovements/gaze/integration.py +++ b/src/pymovements/gaze/integration.py @@ -28,11 +28,17 @@ from pymovements.gaze.experiment import Experiment from pymovements.gaze.gaze_dataframe import GazeDataFrame +from pymovements.utils import checks def from_numpy( - data: np.ndarray, - schema: list[str], + data: np.ndarray | None = None, + time: np.ndarray | None = None, + pixel: np.ndarray | None = None, + position: np.ndarray | None = None, + velocity: np.ndarray | None = None, + acceleration: np.ndarray | None = None, + schema: list[str] | None = None, experiment: Experiment | None = None, orient: Literal['col', 'row'] = 'col', time_column: str | None = None, @@ -43,10 +49,30 @@ def from_numpy( ) -> GazeDataFrame: """Get a :py:class:`~pymovements.gaze.gaze_dataframe.GazeDataFrame` from a numpy array. + There are two mutually exclusive ways of conversion. + + **Single data array**: Pass a single numpy array via `data` and specify its schema and + orientation. You can then additionally pass column specifiers, e.g. `time_column` and + `position_columns`. + + **Column specific arrays**: For each type of signal, you can pass the numpy array explicitly, + e.g. `position` or `velocity`. You must not pass `data` or any column list specifiers using this + method. + Parameters ---------- data: Two-dimensional data represented as a numpy ndarray. + time: + Array of timestamps. + pixel: + Array of gaze pixel positions. + position: + Array of gaze positions in degrees of visual angle. + velocity: + Array of gaze velocities in degrees of visual angle per second. + acceleration: + Array of gaze accelerations in degrees of visual angle per square second. schema: A list of column names. orient: @@ -68,8 +94,154 @@ def from_numpy( Returns ------- py:class:`~pymovements.GazeDataFrame` + + Examples + -------- + Creating an example numpy array with 4 columns and 100 rows. We call this layout column + orientation. + >>> import numpy as np + >>> import pymovements as pm + >>> + >>> arr = np.zeros((3, 100)) + >>> arr.shape + (3, 100) + + Specifying the underlying schema: + >>> schema = ['t', 'x', 'y'] + + Pass the array as ``data`` to ``pm.gaze.from_numpy()``, by specifying schema and components. + >>> gaze = pm.gaze.from_numpy( + ... arr, + ... schema=schema, + ... time_column='t', + ... position_columns=['x', 'y'], + ... orient='col', + ... ) + >>> gaze.frame + shape: (100, 2) + ┌──────┬────────────┐ + │ time ┆ position │ + │ --- ┆ --- │ + │ f64 ┆ list[f64] │ + ╞══════╪════════════╡ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ … ┆ … │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + └──────┴────────────┘ + + Use the ``orient`` keyword argument to specify the layout of your array. + >>> arr.T.shape + (100, 3) + + >>> gaze = pm.gaze.from_numpy( + ... arr.T, + ... schema=schema, + ... time_column='t', + ... position_columns=['x', 'y'], + ... orient='row', + ... ) + >>> gaze.frame + shape: (100, 2) + ┌──────┬────────────┐ + │ time ┆ position │ + │ --- ┆ --- │ + │ f64 ┆ list[f64] │ + ╞══════╪════════════╡ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ … ┆ … │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + └──────┴────────────┘ + + Pass the data explicitly via the specific keyword arguments, without having to specify a schema. + >>> gaze = pm.gaze.from_numpy( + ... time=arr[0], + ... position=arr[[1, 2]], + ... orient='col', + ... ) + >>> gaze.frame + shape: (100, 2) + ┌──────┬────────────┐ + │ time ┆ position │ + │ --- ┆ --- │ + │ f64 ┆ list[f64] │ + ╞══════╪════════════╡ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ … ┆ … │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + │ 0.0 ┆ [0.0, 0.0] │ + └──────┴────────────┘ """ - df = pl.from_numpy(data=data, schema=schema, orient=orient) + # Either data or {time, pixel, position, velocity, acceleration} must be None. + checks.check_is_mutual_exclusive(data=data, time=time) + checks.check_is_mutual_exclusive(data=data, pixel=pixel) + checks.check_is_mutual_exclusive(data=data, position=position) + checks.check_is_mutual_exclusive(data=data, velocity=velocity) + checks.check_is_mutual_exclusive(data=data, acceleration=acceleration) + + if data is not None: + df = pl.from_numpy(data=data, schema=schema, orient=orient) + return GazeDataFrame( + data=df, + experiment=experiment, + time_column=time_column, + pixel_columns=pixel_columns, + position_columns=position_columns, + velocity_columns=velocity_columns, + acceleration_columns=acceleration_columns, + ) + + # Initialize with an empty DataFrame, as every column specifier could be None. + dfs: list[pl.DataFrame] = [pl.DataFrame()] + + time_column = None + if time is not None: + df = pl.from_numpy(data=time, schema=['time'], orient=orient) + dfs.append(df) + time_column = 'time' + + pixel_columns = None + if pixel is not None: + df = pl.from_numpy(data=pixel, orient=orient).select(pl.all().prefix('pixel_')) + dfs.append(df) + pixel_columns = df.columns + + position_columns = None + if position is not None: + df = pl.from_numpy(data=position, orient=orient).select(pl.all().prefix('position_')) + dfs.append(df) + position_columns = df.columns + + velocity_columns = None + if velocity is not None: + df = pl.from_numpy(data=velocity, orient=orient).select(pl.all().prefix('velocity_')) + dfs.append(df) + velocity_columns = df.columns + + acceleration_columns = None + if acceleration is not None: + df = pl.from_numpy(data=acceleration, orient=orient) + df = df.select(pl.all().prefix('acceleration_')) + dfs.append(df) + acceleration_columns = df.columns + + df = pl.concat(dfs, how='horizontal') return GazeDataFrame( data=df, experiment=experiment, diff --git a/tests/gaze/integration_numpy_test.py b/tests/gaze/integration_numpy_test.py index 3e5b2c3da..ae39855d2 100644 --- a/tests/gaze/integration_numpy_test.py +++ b/tests/gaze/integration_numpy_test.py @@ -57,18 +57,23 @@ def test_from_numpy(): assert gaze.columns == schema -def test_from_pandas_explicit_columns(): +def test_from_numpy_with_schema(): array = np.array( [ + [101, 102, 103, 104], [0, 1, 2, 3], [4, 5, 6, 7], [9, 8, 7, 6], [5, 4, 3, 2], + [1, 2, 3, 4], + [5, 6, 7, 8], + [2, 3, 4, 5], + [6, 7, 8, 9], ], - dtype=np.int64, + dtype=np.float64, ) - schema = ['x_pix', 'y_pix', 'x_pos', 'y_pos'] + schema = ['t', 'x_pix', 'y_pix', 'x_pos', 'y_pos', 'x_vel', 'y_vel', 'x_acc', 'y_acc'] experiment = pm.Experiment( screen_width_px=1280, @@ -84,13 +89,99 @@ def test_from_pandas_explicit_columns(): data=array, schema=schema, experiment=experiment, + time_column='t', pixel_columns=['x_pix', 'y_pix'], position_columns=['x_pos', 'y_pos'], + velocity_columns=['x_vel', 'y_vel'], + acceleration_columns=['x_acc', 'y_acc'], + ) + + expected = pl.DataFrame( + { + 'time': [101, 102, 103, 104], + 'pixel': [[0, 4], [1, 5], [2, 6], [3, 7]], + 'position': [[9, 5], [8, 4], [7, 3], [6, 2]], + 'velocity': [[1, 5], [2, 6], [3, 7], [4, 8]], + 'acceleration': [[2, 6], [3, 7], [4, 8], [5, 9]], + }, + schema={ + 'time': pl.Float64, + 'pixel': pl.List(pl.Float64), + 'position': pl.List(pl.Float64), + 'velocity': pl.List(pl.Float64), + 'acceleration': pl.List(pl.Float64), + }, + ) + + assert_frame_equal(gaze.frame, expected) + assert gaze.n_components == 2 + + +def test_from_numpy_explicit_columns(): + time = np.array([101, 102, 103, 104], dtype=np.int64) + pixel = np.array([[0, 1, 2, 3], [4, 5, 6, 7]], dtype=np.int64) + position = np.array([[9, 8, 7, 6], [5, 4, 3, 2]], dtype=np.float64) + velocity = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.float64) + acceleration = np.array([[2, 3, 4, 5], [6, 7, 8, 9]], dtype=np.float64) + + experiment = pm.Experiment( + screen_width_px=1280, + screen_height_px=1024, + screen_width_cm=38, + screen_height_cm=30, + distance_cm=68, + origin='lower left', + sampling_rate=1000.0, + ) + + gaze = pm.gaze.from_numpy( + time=time, + pixel=pixel, + position=position, + velocity=velocity, + acceleration=acceleration, + experiment=experiment, + ) + + expected = pl.DataFrame( + { + 'time': [101, 102, 103, 104], + 'pixel': [[0, 4], [1, 5], [2, 6], [3, 7]], + 'position': [[9, 5], [8, 4], [7, 3], [6, 2]], + 'velocity': [[1, 5], [2, 6], [3, 7], [4, 8]], + 'acceleration': [[2, 6], [3, 7], [4, 8], [5, 9]], + }, + schema={ + 'time': pl.Int64, + 'pixel': pl.List(pl.Int64), + 'position': pl.List(pl.Float64), + 'velocity': pl.List(pl.Float64), + 'acceleration': pl.List(pl.Float64), + }, + ) + + assert_frame_equal(gaze.frame, expected) + assert gaze.n_components == 2 + + +def test_init_all_none(): + gaze = pm.gaze.from_numpy( + data=None, + schema=None, + experiment=None, + time=None, + pixel=None, + position=None, + velocity=None, + acceleration=None, + time_column=None, + pixel_columns=None, + position_columns=None, + velocity_columns=None, + acceleration_columns=None, ) - expected = pl.DataFrame({ - 'pixel': [[0, 4], [1, 5], [2, 6], [3, 7]], - 'position': [[9, 5], [8, 4], [7, 3], [6, 2]], - }) + expected = pl.DataFrame() assert_frame_equal(gaze.frame, expected) + assert gaze.n_components is None