Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add kwargs for explicit data passing in gaze.from_numpy() #513

Merged
merged 12 commits into from
Sep 12, 2023
178 changes: 175 additions & 3 deletions src/pymovements/gaze/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,17 @@

from pymovements.gaze.experiment import Experiment
from pymovements.gaze.gaze_dataframe import GazeDataFrame
from pymovements.utils import checks


def from_numpy(
data: np.ndarray,
schema: list[str],
data: np.ndarray | None = None,
time: np.ndarray | None = None,
pixel: np.ndarray | None = None,
position: np.ndarray | None = None,
velocity: np.ndarray | None = None,
acceleration: np.ndarray | None = None,
schema: list[str] | None = None,
experiment: Experiment | None = None,
orient: Literal['col', 'row'] = 'col',
time_column: str | None = None,
Expand All @@ -43,10 +49,30 @@ def from_numpy(
) -> GazeDataFrame:
"""Get a :py:class:`~pymovements.gaze.gaze_dataframe.GazeDataFrame` from a numpy array.

There are two mutually exclusive ways of conversion.

**Single data array**: Pass a single numpy array via `data` and specify its schema and
orientation. You can then additionally pass column specifiers, e.g. `time_column` and
`position_columns`.

**Column specific arrays**: For each type of signal, you can pass the numpy array explicitly,
e.g. `position` or `velocity`. You must not pass `data` or any column list specifiers using this
method.

Parameters
----------
data:
Two-dimensional data represented as a numpy ndarray.
time:
Array of timestamps.
pixel:
Array of gaze pixel positions.
position:
Array of gaze positions in degrees of visual angle.
velocity:
Array of gaze velocities in degrees of visual angle per second.
acceleration:
Array of gaze accelerations in degrees of visual angle per square second.
schema:
A list of column names.
orient:
Expand All @@ -68,8 +94,154 @@ def from_numpy(
Returns
-------
py:class:`~pymovements.GazeDataFrame`

Examples
--------
Creating an example numpy array with 4 columns and 100 rows. We call this layout column
orientation.
>>> import numpy as np
>>> import pymovements as pm
>>>
>>> arr = np.zeros((3, 100))
>>> arr.shape
(3, 100)

Specifying the underlying schema:
>>> schema = ['t', 'x', 'y']

Pass the array as ``data`` to ``pm.gaze.from_numpy()``, by specifying schema and components.
>>> gaze = pm.gaze.from_numpy(
... arr,
... schema=schema,
... time_column='t',
... position_columns=['x', 'y'],
... orient='col',
... )
>>> gaze.frame
shape: (100, 2)
┌──────┬────────────┐
│ time ┆ position │
│ --- ┆ --- │
│ f64 ┆ list[f64] │
╞══════╪════════════╡
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ … ┆ … │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
└──────┴────────────┘

Use the ``orient`` keyword argument to specify the layout of your array.
>>> arr.T.shape
(100, 3)

>>> gaze = pm.gaze.from_numpy(
... arr.T,
... schema=schema,
... time_column='t',
... position_columns=['x', 'y'],
... orient='row',
... )
>>> gaze.frame
shape: (100, 2)
┌──────┬────────────┐
│ time ┆ position │
│ --- ┆ --- │
│ f64 ┆ list[f64] │
╞══════╪════════════╡
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ … ┆ … │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
└──────┴────────────┘

Pass the data explicitly via the specific keyword arguments, without having to specify a schema.
>>> gaze = pm.gaze.from_numpy(
... time=arr[0],
... position=arr[[1, 2]],
... orient='col',
... )
>>> gaze.frame
shape: (100, 2)
┌──────┬────────────┐
│ time ┆ position │
│ --- ┆ --- │
│ f64 ┆ list[f64] │
╞══════╪════════════╡
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ … ┆ … │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
└──────┴────────────┘
"""
df = pl.from_numpy(data=data, schema=schema, orient=orient)
# Either data or {time, pixel, position, velocity, acceleration} must be None.
checks.check_is_mutual_exclusive(data=data, time=time)
checks.check_is_mutual_exclusive(data=data, pixel=pixel)
checks.check_is_mutual_exclusive(data=data, position=position)
checks.check_is_mutual_exclusive(data=data, velocity=velocity)
checks.check_is_mutual_exclusive(data=data, acceleration=acceleration)

if data is not None:
df = pl.from_numpy(data=data, schema=schema, orient=orient)
return GazeDataFrame(
data=df,
experiment=experiment,
time_column=time_column,
pixel_columns=pixel_columns,
position_columns=position_columns,
velocity_columns=velocity_columns,
acceleration_columns=acceleration_columns,
)

# Initialize with an empty DataFrame, as every column specifier could be None.
dfs: list[pl.DataFrame] = [pl.DataFrame()]

time_column = None
if time is not None:
df = pl.from_numpy(data=time, schema=['time'], orient=orient)
dfs.append(df)
time_column = 'time'

pixel_columns = None
if pixel is not None:
df = pl.from_numpy(data=pixel, orient=orient).select(pl.all().prefix('pixel_'))
dfs.append(df)
pixel_columns = df.columns

position_columns = None
if position is not None:
df = pl.from_numpy(data=position, orient=orient).select(pl.all().prefix('position_'))
dfs.append(df)
position_columns = df.columns

velocity_columns = None
if velocity is not None:
df = pl.from_numpy(data=velocity, orient=orient).select(pl.all().prefix('velocity_'))
dfs.append(df)
velocity_columns = df.columns

acceleration_columns = None
if acceleration is not None:
df = pl.from_numpy(data=acceleration, orient=orient)
df = df.select(pl.all().prefix('acceleration_'))
dfs.append(df)
acceleration_columns = df.columns

df = pl.concat(dfs, how='horizontal')
return GazeDataFrame(
data=df,
experiment=experiment,
Expand Down
105 changes: 98 additions & 7 deletions tests/gaze/integration_numpy_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,23 @@ def test_from_numpy():
assert gaze.columns == schema


def test_from_pandas_explicit_columns():
def test_from_numpy_with_schema():
array = np.array(
[
[101, 102, 103, 104],
[0, 1, 2, 3],
[4, 5, 6, 7],
[9, 8, 7, 6],
[5, 4, 3, 2],
[1, 2, 3, 4],
[5, 6, 7, 8],
[2, 3, 4, 5],
[6, 7, 8, 9],
],
dtype=np.int64,
dtype=np.float64,
)

schema = ['x_pix', 'y_pix', 'x_pos', 'y_pos']
schema = ['t', 'x_pix', 'y_pix', 'x_pos', 'y_pos', 'x_vel', 'y_vel', 'x_acc', 'y_acc']

experiment = pm.Experiment(
screen_width_px=1280,
Expand All @@ -84,13 +89,99 @@ def test_from_pandas_explicit_columns():
data=array,
schema=schema,
experiment=experiment,
time_column='t',
pixel_columns=['x_pix', 'y_pix'],
position_columns=['x_pos', 'y_pos'],
velocity_columns=['x_vel', 'y_vel'],
acceleration_columns=['x_acc', 'y_acc'],
)

expected = pl.DataFrame(
{
'time': [101, 102, 103, 104],
'pixel': [[0, 4], [1, 5], [2, 6], [3, 7]],
'position': [[9, 5], [8, 4], [7, 3], [6, 2]],
'velocity': [[1, 5], [2, 6], [3, 7], [4, 8]],
'acceleration': [[2, 6], [3, 7], [4, 8], [5, 9]],
},
schema={
'time': pl.Float64,
'pixel': pl.List(pl.Float64),
'position': pl.List(pl.Float64),
'velocity': pl.List(pl.Float64),
'acceleration': pl.List(pl.Float64),
},
)

assert_frame_equal(gaze.frame, expected)
assert gaze.n_components == 2


def test_from_numpy_explicit_columns():
time = np.array([101, 102, 103, 104], dtype=np.int64)
pixel = np.array([[0, 1, 2, 3], [4, 5, 6, 7]], dtype=np.int64)
position = np.array([[9, 8, 7, 6], [5, 4, 3, 2]], dtype=np.float64)
velocity = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.float64)
acceleration = np.array([[2, 3, 4, 5], [6, 7, 8, 9]], dtype=np.float64)

experiment = pm.Experiment(
screen_width_px=1280,
screen_height_px=1024,
screen_width_cm=38,
screen_height_cm=30,
distance_cm=68,
origin='lower left',
sampling_rate=1000.0,
)

gaze = pm.gaze.from_numpy(
time=time,
pixel=pixel,
position=position,
velocity=velocity,
acceleration=acceleration,
experiment=experiment,
)

expected = pl.DataFrame(
{
'time': [101, 102, 103, 104],
'pixel': [[0, 4], [1, 5], [2, 6], [3, 7]],
'position': [[9, 5], [8, 4], [7, 3], [6, 2]],
'velocity': [[1, 5], [2, 6], [3, 7], [4, 8]],
'acceleration': [[2, 6], [3, 7], [4, 8], [5, 9]],
},
schema={
'time': pl.Int64,
'pixel': pl.List(pl.Int64),
'position': pl.List(pl.Float64),
'velocity': pl.List(pl.Float64),
'acceleration': pl.List(pl.Float64),
},
)

assert_frame_equal(gaze.frame, expected)
assert gaze.n_components == 2


def test_init_all_none():
gaze = pm.gaze.from_numpy(
data=None,
schema=None,
experiment=None,
time=None,
pixel=None,
position=None,
velocity=None,
acceleration=None,
time_column=None,
pixel_columns=None,
position_columns=None,
velocity_columns=None,
acceleration_columns=None,
)

expected = pl.DataFrame({
'pixel': [[0, 4], [1, 5], [2, 6], [3, 7]],
'position': [[9, 5], [8, 4], [7, 3], [6, 2]],
})
expected = pl.DataFrame()

assert_frame_equal(gaze.frame, expected)
assert gaze.n_components is None