Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: split TextStimulus by column values #879

Merged
merged 21 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
384a417
feat: splitting aois criteria
izaskr Oct 24, 2024
ecea48f
feat: splitting aois criteria, fix pydocstyle
izaskr Oct 24, 2024
39a5f00
feat: splitting aois criteria, fix pydocstyle
izaskr Oct 24, 2024
b94a301
Merge branch 'main' of https://github.com/aeye-lab/pymovements into a…
izaskr Oct 24, 2024
6406dd2
Merge remote-tracking branch 'origin/aoi_events' into aoi_events
izaskr Oct 24, 2024
b65185d
Merge remote-tracking branch 'origin/aoi_events' into aoi_events
izaskr Oct 24, 2024
678a184
Merge branch 'main' of https://github.com/aeye-lab/pymovements into a…
izaskr Oct 24, 2024
2a822b6
Merge remote-tracking branch 'origin/aoi_events' into aoi_events
izaskr Oct 24, 2024
c51aadb
Merge remote-tracking branch 'origin/aoi_events' into aoi_events
izaskr Oct 24, 2024
a6467e3
Merge branch 'main' of https://github.com/aeye-lab/pymovements into a…
izaskr Oct 25, 2024
07dee8a
Merge branch 'aoi_events' of https://github.com/aeye-lab/pymovements …
izaskr Oct 25, 2024
772987d
Merge branch 'aoi_events' of https://github.com/aeye-lab/pymovements …
izaskr Oct 25, 2024
019bd0d
Merge branch 'aoi_events' of https://github.com/aeye-lab/pymovements …
izaskr Oct 25, 2024
eef4d66
Merge branch 'aoi_events' of https://github.com/aeye-lab/pymovements …
izaskr Oct 25, 2024
652d0b0
Merge branch 'aoi_events' of https://github.com/aeye-lab/pymovements …
izaskr Oct 25, 2024
9625ab1
return TextStimulus instead of pl.DataFrame
SiQube Dec 29, 2024
0865d30
Merge branch 'main' into aoi_events
SiQube Dec 29, 2024
66a9974
Merge branch 'main' into aoi_events
SiQube Jan 8, 2025
1776c7d
adjust parameter ids
dkrako Jan 8, 2025
73b7b76
allow for list of strings as `by` argument
dkrako Jan 8, 2025
bfafab9
sequence not collection
dkrako Jan 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions src/pymovements/stimulus/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"""Module for the TextDataFrame."""
from __future__ import annotations

from collections.abc import Sequence
from pathlib import Path
from typing import Any

Expand Down Expand Up @@ -80,6 +81,37 @@ def __init__(
self.end_y_column = end_y_column
self.page_column = page_column

def split(
self,
by: str | Sequence[str],
) -> list[TextStimulus]:
"""Split the AOI df.

Parameters
----------
by: str | Sequence[str]
Splitting criteria.

Returns
-------
list[TextStimulus]
A list of TextStimulus objects.
"""
return [
TextStimulus(
aois=df,
aoi_column=self.aoi_column,
width_column=self.width_column,
height_column=self.height_column,
start_x_column=self.start_x_column,
start_y_column=self.start_y_column,
end_x_column=self.end_x_column,
end_y_column=self.end_y_column,
page_column=self.page_column,
)
for df in self.aois.partition_by(by=by, as_dict=False)
]


def from_file(
aoi_path: str | Path,
Expand Down
100 changes: 99 additions & 1 deletion tests/unit/stimulus/text_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@
Path('tests/files/toy_text_1_1_aoi.csv'),
{'separator': ','},
EXPECTED_DF,
id='toy_text_1_1_aoi',
id='toy_text_1_1_aoi_sep',
),
],
)
Expand Down Expand Up @@ -226,3 +226,101 @@ def test_text_stimulus_unsupported_format():
expected = 'unsupported file format ".pickle".Supported formats are: '\
'[\'.csv\', \'.ias\', \'.tsv\', \'.txt\']'
assert msg == expected


@pytest.mark.parametrize(
('aoi_file', 'custom_read_kwargs'),
[
pytest.param(
'tests/files/toy_text_1_1_aoi.csv',
None,
id='toy_text_1_1_aoi',
),
pytest.param(
Path('tests/files/toy_text_1_1_aoi.csv'),
{'separator': ','},
id='toy_text_1_1_aoi_sep',
),
],
)
def test_text_stimulus_splitting(aoi_file, custom_read_kwargs):
aois_df = pm.stimulus.text.from_file(
aoi_file,
aoi_column='char',
start_x_column='top_left_x',
start_y_column='top_left_y',
width_column='width',
height_column='height',
page_column='page',
custom_read_kwargs=custom_read_kwargs,
)

aois_df = aois_df.split(by='line_idx')
assert len(aois_df) == 2
dkrako marked this conversation as resolved.
Show resolved Hide resolved


@pytest.mark.parametrize(
('aoi_file', 'custom_read_kwargs'),
[
pytest.param(
'tests/files/toy_text_1_1_aoi.csv',
None,
id='toy_text_1_1_aoi',
),
pytest.param(
Path('tests/files/toy_text_1_1_aoi.csv'),
{'separator': ','},
id='toy_text_1_1_aoi_sep',
),
],
)
def test_text_stimulus_splitting_unique_within(aoi_file, custom_read_kwargs):
aois_df = pm.stimulus.text.from_file(
aoi_file,
aoi_column='char',
start_x_column='top_left_x',
start_y_column='top_left_y',
width_column='width',
height_column='height',
page_column='page',
custom_read_kwargs=custom_read_kwargs,
)

aois_df = aois_df.split(by='line_idx')
assert all(df.aois.n_unique(subset=['line_idx']) == 1 for df in aois_df)


@pytest.mark.parametrize(
('aoi_file', 'custom_read_kwargs'),
[
pytest.param(
'tests/files/toy_text_1_1_aoi.csv',
None,
id='toy_text_1_1_aoi',
),
pytest.param(
Path('tests/files/toy_text_1_1_aoi.csv'),
{'separator': ','},
id='toy_text_1_1_aoi_sep',
),
],
)
def test_text_stimulus_splitting_different_between(aoi_file, custom_read_kwargs):
aois_df = pm.stimulus.text.from_file(
aoi_file,
aoi_column='char',
start_x_column='top_left_x',
start_y_column='top_left_y',
width_column='width',
height_column='height',
page_column='page',
custom_read_kwargs=custom_read_kwargs,
)

aois_df = aois_df.split(by='line_idx')
unique_values = []
for df in aois_df:
unique_value = df.aois.unique(subset=['line_idx'])['line_idx'].to_list()
unique_values.extend(unique_value)

assert len(unique_values) == len(set(unique_values))
Loading