diff --git a/src/pymovements/stimulus/text.py b/src/pymovements/stimulus/text.py index 3d184006..26935f66 100644 --- a/src/pymovements/stimulus/text.py +++ b/src/pymovements/stimulus/text.py @@ -80,6 +80,24 @@ def __init__( self.end_y_column = end_y_column self.page_column = page_column + def split( + self, + by: str, + ) -> list[TextStimulus]: + """Split the AOI df. + + Parameters + ---------- + by: str + Splitting criteria. + + Returns + ------- + list[TextStimulus] + A list of TextStimulus objects. + """ + return self.aois.partition_by(by=by, as_dict=False) + def from_file( aoi_path: str | Path, diff --git a/tests/unit/stimulus/text_test.py b/tests/unit/stimulus/text_test.py index 2cf99033..c7860022 100644 --- a/tests/unit/stimulus/text_test.py +++ b/tests/unit/stimulus/text_test.py @@ -226,3 +226,101 @@ def test_text_stimulus_unsupported_format(): expected = 'unsupported file format ".pickle".Supported formats are: '\ '[\'.csv\', \'.ias\', \'.tsv\', \'.txt\']' assert msg == expected + + +@pytest.mark.parametrize( + ('aoi_file', 'custom_read_kwargs'), + [ + pytest.param( + 'tests/files/toy_text_1_1_aoi.csv', + None, + id='toy_text_1_1_aoi', + ), + pytest.param( + Path('tests/files/toy_text_1_1_aoi.csv'), + {'separator': ','}, + id='toy_text_1_1_aoi', + ), + ], +) +def test_text_stimulus_splitting(aoi_file, custom_read_kwargs): + aois_df = pm.stimulus.text.from_file( + aoi_file, + aoi_column='char', + start_x_column='top_left_x', + start_y_column='top_left_y', + width_column='width', + height_column='height', + page_column='page', + custom_read_kwargs=custom_read_kwargs, + ) + + aois_df = aois_df.split(by='line_idx') + assert len(aois_df) == 2 + + +@pytest.mark.parametrize( + ('aoi_file', 'custom_read_kwargs'), + [ + pytest.param( + 'tests/files/toy_text_1_1_aoi.csv', + None, + id='toy_text_1_1_aoi', + ), + pytest.param( + Path('tests/files/toy_text_1_1_aoi.csv'), + {'separator': ','}, + id='toy_text_1_1_aoi', + ), + ], +) +def test_text_stimulus_splitting_unique_within(aoi_file, custom_read_kwargs): + aois_df = pm.stimulus.text.from_file( + aoi_file, + aoi_column='char', + start_x_column='top_left_x', + start_y_column='top_left_y', + width_column='width', + height_column='height', + page_column='page', + custom_read_kwargs=custom_read_kwargs, + ) + + aois_df = aois_df.split(by='line_idx') + assert all(df.n_unique(subset=['line_idx']) == 1 for df in aois_df) + + +@pytest.mark.parametrize( + ('aoi_file', 'custom_read_kwargs'), + [ + pytest.param( + 'tests/files/toy_text_1_1_aoi.csv', + None, + id='toy_text_1_1_aoi', + ), + pytest.param( + Path('tests/files/toy_text_1_1_aoi.csv'), + {'separator': ','}, + id='toy_text_1_1_aoi', + ), + ], +) +def test_text_stimulus_splitting_different_between(aoi_file, custom_read_kwargs): + aois_df = pm.stimulus.text.from_file( + aoi_file, + aoi_column='char', + start_x_column='top_left_x', + start_y_column='top_left_y', + width_column='width', + height_column='height', + page_column='page', + custom_read_kwargs=custom_read_kwargs, + ) + + aois_df = aois_df.split(by='line_idx') + unique_values = [] + for df in aois_df: + unique_value = df.unique(subset=['line_idx'])['line_idx'].to_list() + unique_values.extend(unique_value) + + assert len(unique_values) == len(set(unique_values))