Skip to content

Commit

Permalink
feat: Add kwargs for explicit column passing in gaze.from_numpy() (#513)
Browse files Browse the repository at this point in the history
  • Loading branch information
dkrako authored Sep 12, 2023
1 parent 380df1e commit 045f6a2
Show file tree
Hide file tree
Showing 2 changed files with 273 additions and 10 deletions.
178 changes: 175 additions & 3 deletions src/pymovements/gaze/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,17 @@

from pymovements.gaze.experiment import Experiment
from pymovements.gaze.gaze_dataframe import GazeDataFrame
from pymovements.utils import checks


def from_numpy(
data: np.ndarray,
schema: list[str],
data: np.ndarray | None = None,
time: np.ndarray | None = None,
pixel: np.ndarray | None = None,
position: np.ndarray | None = None,
velocity: np.ndarray | None = None,
acceleration: np.ndarray | None = None,
schema: list[str] | None = None,
experiment: Experiment | None = None,
orient: Literal['col', 'row'] = 'col',
time_column: str | None = None,
Expand All @@ -43,10 +49,30 @@ def from_numpy(
) -> GazeDataFrame:
"""Get a :py:class:`~pymovements.gaze.gaze_dataframe.GazeDataFrame` from a numpy array.
There are two mutually exclusive ways of conversion.
**Single data array**: Pass a single numpy array via `data` and specify its schema and
orientation. You can then additionally pass column specifiers, e.g. `time_column` and
`position_columns`.
**Column specific arrays**: For each type of signal, you can pass the numpy array explicitly,
e.g. `position` or `velocity`. You must not pass `data` or any column list specifiers using this
method.
Parameters
----------
data:
Two-dimensional data represented as a numpy ndarray.
time:
Array of timestamps.
pixel:
Array of gaze pixel positions.
position:
Array of gaze positions in degrees of visual angle.
velocity:
Array of gaze velocities in degrees of visual angle per second.
acceleration:
Array of gaze accelerations in degrees of visual angle per square second.
schema:
A list of column names.
orient:
Expand All @@ -68,8 +94,154 @@ def from_numpy(
Returns
-------
py:class:`~pymovements.GazeDataFrame`
Examples
--------
Creating an example numpy array with 4 columns and 100 rows. We call this layout column
orientation.
>>> import numpy as np
>>> import pymovements as pm
>>>
>>> arr = np.zeros((3, 100))
>>> arr.shape
(3, 100)
Specifying the underlying schema:
>>> schema = ['t', 'x', 'y']
Pass the array as ``data`` to ``pm.gaze.from_numpy()``, by specifying schema and components.
>>> gaze = pm.gaze.from_numpy(
... arr,
... schema=schema,
... time_column='t',
... position_columns=['x', 'y'],
... orient='col',
... )
>>> gaze.frame
shape: (100, 2)
┌──────┬────────────┐
│ time ┆ position │
│ --- ┆ --- │
│ f64 ┆ list[f64] │
╞══════╪════════════╡
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ … ┆ … │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
└──────┴────────────┘
Use the ``orient`` keyword argument to specify the layout of your array.
>>> arr.T.shape
(100, 3)
>>> gaze = pm.gaze.from_numpy(
... arr.T,
... schema=schema,
... time_column='t',
... position_columns=['x', 'y'],
... orient='row',
... )
>>> gaze.frame
shape: (100, 2)
┌──────┬────────────┐
│ time ┆ position │
│ --- ┆ --- │
│ f64 ┆ list[f64] │
╞══════╪════════════╡
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ … ┆ … │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
└──────┴────────────┘
Pass the data explicitly via the specific keyword arguments, without having to specify a schema.
>>> gaze = pm.gaze.from_numpy(
... time=arr[0],
... position=arr[[1, 2]],
... orient='col',
... )
>>> gaze.frame
shape: (100, 2)
┌──────┬────────────┐
│ time ┆ position │
│ --- ┆ --- │
│ f64 ┆ list[f64] │
╞══════╪════════════╡
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ … ┆ … │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
│ 0.0 ┆ [0.0, 0.0] │
└──────┴────────────┘
"""
df = pl.from_numpy(data=data, schema=schema, orient=orient)
# Either data or {time, pixel, position, velocity, acceleration} must be None.
checks.check_is_mutual_exclusive(data=data, time=time)
checks.check_is_mutual_exclusive(data=data, pixel=pixel)
checks.check_is_mutual_exclusive(data=data, position=position)
checks.check_is_mutual_exclusive(data=data, velocity=velocity)
checks.check_is_mutual_exclusive(data=data, acceleration=acceleration)

if data is not None:
df = pl.from_numpy(data=data, schema=schema, orient=orient)
return GazeDataFrame(
data=df,
experiment=experiment,
time_column=time_column,
pixel_columns=pixel_columns,
position_columns=position_columns,
velocity_columns=velocity_columns,
acceleration_columns=acceleration_columns,
)

# Initialize with an empty DataFrame, as every column specifier could be None.
dfs: list[pl.DataFrame] = [pl.DataFrame()]

time_column = None
if time is not None:
df = pl.from_numpy(data=time, schema=['time'], orient=orient)
dfs.append(df)
time_column = 'time'

pixel_columns = None
if pixel is not None:
df = pl.from_numpy(data=pixel, orient=orient).select(pl.all().prefix('pixel_'))
dfs.append(df)
pixel_columns = df.columns

position_columns = None
if position is not None:
df = pl.from_numpy(data=position, orient=orient).select(pl.all().prefix('position_'))
dfs.append(df)
position_columns = df.columns

velocity_columns = None
if velocity is not None:
df = pl.from_numpy(data=velocity, orient=orient).select(pl.all().prefix('velocity_'))
dfs.append(df)
velocity_columns = df.columns

acceleration_columns = None
if acceleration is not None:
df = pl.from_numpy(data=acceleration, orient=orient)
df = df.select(pl.all().prefix('acceleration_'))
dfs.append(df)
acceleration_columns = df.columns

df = pl.concat(dfs, how='horizontal')
return GazeDataFrame(
data=df,
experiment=experiment,
Expand Down
105 changes: 98 additions & 7 deletions tests/gaze/integration_numpy_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,23 @@ def test_from_numpy():
assert gaze.columns == schema


def test_from_pandas_explicit_columns():
def test_from_numpy_with_schema():
array = np.array(
[
[101, 102, 103, 104],
[0, 1, 2, 3],
[4, 5, 6, 7],
[9, 8, 7, 6],
[5, 4, 3, 2],
[1, 2, 3, 4],
[5, 6, 7, 8],
[2, 3, 4, 5],
[6, 7, 8, 9],
],
dtype=np.int64,
dtype=np.float64,
)

schema = ['x_pix', 'y_pix', 'x_pos', 'y_pos']
schema = ['t', 'x_pix', 'y_pix', 'x_pos', 'y_pos', 'x_vel', 'y_vel', 'x_acc', 'y_acc']

experiment = pm.Experiment(
screen_width_px=1280,
Expand All @@ -84,13 +89,99 @@ def test_from_pandas_explicit_columns():
data=array,
schema=schema,
experiment=experiment,
time_column='t',
pixel_columns=['x_pix', 'y_pix'],
position_columns=['x_pos', 'y_pos'],
velocity_columns=['x_vel', 'y_vel'],
acceleration_columns=['x_acc', 'y_acc'],
)

expected = pl.DataFrame(
{
'time': [101, 102, 103, 104],
'pixel': [[0, 4], [1, 5], [2, 6], [3, 7]],
'position': [[9, 5], [8, 4], [7, 3], [6, 2]],
'velocity': [[1, 5], [2, 6], [3, 7], [4, 8]],
'acceleration': [[2, 6], [3, 7], [4, 8], [5, 9]],
},
schema={
'time': pl.Float64,
'pixel': pl.List(pl.Float64),
'position': pl.List(pl.Float64),
'velocity': pl.List(pl.Float64),
'acceleration': pl.List(pl.Float64),
},
)

assert_frame_equal(gaze.frame, expected)
assert gaze.n_components == 2


def test_from_numpy_explicit_columns():
time = np.array([101, 102, 103, 104], dtype=np.int64)
pixel = np.array([[0, 1, 2, 3], [4, 5, 6, 7]], dtype=np.int64)
position = np.array([[9, 8, 7, 6], [5, 4, 3, 2]], dtype=np.float64)
velocity = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.float64)
acceleration = np.array([[2, 3, 4, 5], [6, 7, 8, 9]], dtype=np.float64)

experiment = pm.Experiment(
screen_width_px=1280,
screen_height_px=1024,
screen_width_cm=38,
screen_height_cm=30,
distance_cm=68,
origin='lower left',
sampling_rate=1000.0,
)

gaze = pm.gaze.from_numpy(
time=time,
pixel=pixel,
position=position,
velocity=velocity,
acceleration=acceleration,
experiment=experiment,
)

expected = pl.DataFrame(
{
'time': [101, 102, 103, 104],
'pixel': [[0, 4], [1, 5], [2, 6], [3, 7]],
'position': [[9, 5], [8, 4], [7, 3], [6, 2]],
'velocity': [[1, 5], [2, 6], [3, 7], [4, 8]],
'acceleration': [[2, 6], [3, 7], [4, 8], [5, 9]],
},
schema={
'time': pl.Int64,
'pixel': pl.List(pl.Int64),
'position': pl.List(pl.Float64),
'velocity': pl.List(pl.Float64),
'acceleration': pl.List(pl.Float64),
},
)

assert_frame_equal(gaze.frame, expected)
assert gaze.n_components == 2


def test_init_all_none():
gaze = pm.gaze.from_numpy(
data=None,
schema=None,
experiment=None,
time=None,
pixel=None,
position=None,
velocity=None,
acceleration=None,
time_column=None,
pixel_columns=None,
position_columns=None,
velocity_columns=None,
acceleration_columns=None,
)

expected = pl.DataFrame({
'pixel': [[0, 4], [1, 5], [2, 6], [3, 7]],
'position': [[9, 5], [8, 4], [7, 3], [6, 2]],
})
expected = pl.DataFrame()

assert_frame_equal(gaze.frame, expected)
assert gaze.n_components is None

0 comments on commit 045f6a2

Please sign in to comment.