From 5a790a0c9fe43c53116709c0de3aaddec2363211 Mon Sep 17 00:00:00 2001
From: Josephine Funken <funken@uni-bremen.de>
Date: Thu, 21 Sep 2023 15:02:37 +0200
Subject: [PATCH 01/20] Added functionality to remove single node top-level
 directories in datasets

---
 src/pymovements/dataset/dataset.py          | 10 +++++++++-
 src/pymovements/dataset/dataset_download.py |  4 ++++
 src/pymovements/utils/archives.py           | 13 ++++++++++++-
 tests/utils/archives_test.py                | 15 +++++++++++++++
 4 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/src/pymovements/dataset/dataset.py b/src/pymovements/dataset/dataset.py
index 987d39a2f..0077f9d14 100644
--- a/src/pymovements/dataset/dataset.py
+++ b/src/pymovements/dataset/dataset.py
@@ -729,13 +729,20 @@ def download(
         )
         return self
 
-    def extract(self, remove_finished: bool = False, verbose: int = 1) -> Dataset:
+    def extract(
+            self,
+            remove_finished: bool = False,
+            remove_top_level: bool = True,
+            verbose: int = 1,
+    ) -> Dataset:
         """Extract downloaded dataset archive files.
 
         Parameters
         ----------
         remove_finished : bool
             Remove archive files after extraction.
+        remove_top_level: bool
+            If ``True``, remove the top-level directory if it has only one child.
         verbose : int
             Verbosity levels: (1) Print messages for extracting each dataset resource without
             printing messages for recursive archives. (2) Print additional messages for each
@@ -750,6 +757,7 @@ def extract(self, remove_finished: bool = False, verbose: int = 1) -> Dataset:
             definition=self.definition,
             paths=self.paths,
             remove_finished=remove_finished,
+            remove_top_level=remove_top_level,
             verbose=verbose,
         )
         return self
diff --git a/src/pymovements/dataset/dataset_download.py b/src/pymovements/dataset/dataset_download.py
index be0d8512f..427548446 100644
--- a/src/pymovements/dataset/dataset_download.py
+++ b/src/pymovements/dataset/dataset_download.py
@@ -122,6 +122,7 @@ def extract_dataset(
         definition: DatasetDefinition,
         paths: DatasetPaths,
         remove_finished: bool = False,
+        remove_top_level: bool = True,
         verbose: int = 1,
 ) -> None:
     """Extract downloaded dataset archive files.
@@ -134,6 +135,8 @@ def extract_dataset(
         The dataset paths.
     remove_finished : bool
         Remove archive files after extraction.
+    remove_top_level: bool
+        If ``True``, remove the top-level directory if it has only one child.
     verbose:
         Verbosity levels: (1) Print messages for extracting each dataset resource without printing
         messages for recursive archives. (2) Print messages for extracting each dataset resource and
@@ -150,5 +153,6 @@ def extract_dataset(
             destination_path=destination_path,
             recursive=True,
             remove_finished=remove_finished,
+            remove_top_level=remove_top_level,
             verbose=verbose,
         )
diff --git a/src/pymovements/utils/archives.py b/src/pymovements/utils/archives.py
index 53f53c0c7..93994fa24 100644
--- a/src/pymovements/utils/archives.py
+++ b/src/pymovements/utils/archives.py
@@ -23,6 +23,8 @@
 import bz2
 import gzip
 import lzma
+import os
+import shutil
 import tarfile
 import zipfile
 from collections.abc import Callable
@@ -37,6 +39,7 @@ def extract_archive(
         destination_path: Path | None = None,
         recursive: bool = True,
         remove_finished: bool = False,
+        remove_top_level: bool = True,
         verbose: int = 1,
 ) -> Path:
     """Extract an archive.
@@ -55,11 +58,12 @@ def extract_archive(
         Recursively extract archives which are included in extracted archive.
     remove_finished : bool
         If ``True``, remove the file after the extraction.
+    remove_top_level: bool
+        If ``True``, remove the top-level directory if it has only one child.
     verbose:
         Verbosity levels: (1) Print messages for extracting each dataset resource without printing
         messages for recursive archives. (2) Print additional messages for each recursive archive
         extract.
-
     Returns
     -------
     Path :
@@ -110,6 +114,13 @@ def extract_archive(
                 verbose=0 if verbose < 2 else 2,
             )
 
+    if remove_top_level:
+        # Check if top-level directory has a single child
+        if len([f.path for f in os.scandir(destination_path)]) == 1:
+            single_child = [f.path for f in os.scandir(destination_path)][0]
+            shutil.copytree(single_child, destination_path, dirs_exist_ok=True)
+            shutil.rmtree(single_child)
+
     return destination_path
 
 
diff --git a/tests/utils/archives_test.py b/tests/utils/archives_test.py
index 1f20818a1..96de4477c 100644
--- a/tests/utils/archives_test.py
+++ b/tests/utils/archives_test.py
@@ -296,3 +296,18 @@ def test_decompress_unknown_compression_suffix():
         _decompress(pathlib.Path('test.zip.zip'))
     msg, = excinfo.value.args
     assert msg == "Couldn't detect a compression from suffix .zip."
+
+
+@pytest.mark.parametrize(
+    'remove_top_level',
+    [
+        pytest.param(False, id='remove_top_level_false'),
+        pytest.param(True, id='remove_top_level_true'),
+    ],
+)
+def test_remove_top_level(remove_top_level, archive):
+    extract_archive(
+        source_path=archive,
+        destination_path=None,
+        remove_top_level=remove_top_level,
+    )

From 6fe1973c882b89c86265dca1240e790d8568484e Mon Sep 17 00:00:00 2001
From: "David R. Reich" <43832476+SiQube@users.noreply.github.com>
Date: Fri, 22 Sep 2023 01:28:34 -0400
Subject: [PATCH 02/20] fix: restrict to matplotlib<3.8 until we fixed typing
 (#561)

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d50f1b34e..d157d537f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ classifiers = [
   "Typing :: Typed"
 ]
 dependencies = [
-  "matplotlib>=3.0.0,<4",
+  "matplotlib>=3.0.0,<3.8",
   "numpy>=1.10.0,<2",
   "pandas>1.0.0,<3",
   "polars>=0.19.0,<0.20.0",

From 0af0bf527596fa8667c05219fe1e2a2965774cb2 Mon Sep 17 00:00:00 2001
From: prassepaul <prasse.paul@googlemail.com>
Date: Fri, 22 Sep 2023 07:45:45 +0200
Subject: [PATCH 03/20] feat: Add gaze.from_csv() (#557)

---
 src/pymovements/gaze/__init__.py          |   2 +
 src/pymovements/gaze/io.py                | 160 ++++++++++++++++++++++
 tests/gaze/io/csv_test.py                 |  52 +++++++
 tests/gaze/io/files/binocular_example.csv |  11 ++
 tests/gaze/io/files/monocular_example.csv |  11 ++
 5 files changed, 236 insertions(+)
 create mode 100644 src/pymovements/gaze/io.py
 create mode 100644 tests/gaze/io/csv_test.py
 create mode 100644 tests/gaze/io/files/binocular_example.csv
 create mode 100644 tests/gaze/io/files/monocular_example.csv

diff --git a/src/pymovements/gaze/__init__.py b/src/pymovements/gaze/__init__.py
index b8bcec64d..0e2ceed5a 100644
--- a/src/pymovements/gaze/__init__.py
+++ b/src/pymovements/gaze/__init__.py
@@ -62,6 +62,7 @@
 from pymovements.gaze.gaze_dataframe import GazeDataFrame
 from pymovements.gaze.integration import from_numpy
 from pymovements.gaze.integration import from_pandas
+from pymovements.gaze.io import from_csv
 from pymovements.gaze.screen import Screen
 
 
@@ -73,4 +74,5 @@
     'Screen',
     'transforms_numpy',
     'transforms',
+    'from_csv',
 ]
diff --git a/src/pymovements/gaze/io.py b/src/pymovements/gaze/io.py
new file mode 100644
index 000000000..81bd391f7
--- /dev/null
+++ b/src/pymovements/gaze/io.py
@@ -0,0 +1,160 @@
+# Copyright (c) 2023 The pymovements Project Authors
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+"""Functionality to load GazeDataFrame from a csv file."""
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import polars as pl
+
+from pymovements.gaze import Experiment  # pylint: disable=cyclic-import
+from pymovements.gaze.gaze_dataframe import GazeDataFrame  # pylint: disable=cyclic-import
+
+
+def from_csv(
+        file: str | Path,
+        experiment: Experiment | None = None,
+        *,
+        trial_columns: list[str] | None = None,
+        time_column: str | None = None,
+        pixel_columns: list[str] | None = None,
+        position_columns: list[str] | None = None,
+        velocity_columns: list[str] | None = None,
+        acceleration_columns: list[str] | None = None,
+        **read_csv_kwargs: Any,
+) -> GazeDataFrame:
+    """Initialize a :py:class:`pymovements.gaze.gaze_dataframe.GazeDataFrame`.
+
+    Parameters
+    ----------
+    file:
+        Path of gaze file.
+    experiment : Experiment
+        The experiment definition.
+    trial_columns:
+        The name of the trial columns in the input data frame. If the list is empty or None,
+        the input data frame is assumed to contain only one trial. If the list is not empty,
+        the input data frame is assumed to contain multiple trials and the transformation
+        methods will be applied to each trial separately.
+    time_column:
+        The name of the timestamp column in the input data frame.
+    pixel_columns:
+        The name of the pixel position columns in the input data frame. These columns will be
+        nested into the column ``pixel``. If the list is empty or None, the nested ``pixel``
+        column will not be created.
+    position_columns:
+        The name of the dva position columns in the input data frame. These columns will be
+        nested into the column ``position``. If the list is empty or None, the nested
+        ``position`` column will not be created.
+    velocity_columns:
+        The name of the velocity columns in the input data frame. These columns will be nested
+        into the column ``velocity``. If the list is empty or None, the nested ``velocity``
+        column will not be created.
+    acceleration_columns:
+        The name of the acceleration columns in the input data frame. These columns will be
+        nested into the column ``acceleration``. If the list is empty or None, the nested
+        ``acceleration`` column will not be created.
+    **read_csv_kwargs:
+            Additional keyword arguments to be passed to polars to read in the csv.
+
+    Notes
+    -----
+    About using the arguments ``pixel_columns``, ``position_columns``, ``velocity_columns``,
+    and ``acceleration_columns``:
+
+    By passing a list of columns as any of these arguments, these columns will be merged into a
+    single column with the corresponding name , e.g. using `pixel_columns` will merge the
+    respective columns into the column `pixel`.
+
+    The supported number of component columns with the expected order are:
+
+    * zero columns: No nested component column will be created.
+    * two columns: monocular data; expected order: x-component, y-component
+    * four columns: binocular data; expected order: x-component left eye, y-component left eye,
+      x-component right eye, y-component right eye,
+    * six columns: binocular data with additional cyclopian data; expected order: x-component
+      left eye, y-component left eye, x-component right eye, y-component right eye,
+      x-component cyclopian eye, y-component cyclopian eye,
+
+
+    Examples
+    --------
+    First let's assume a CSV file stored `tests/gaze/io/files/monocular_example.csv`
+    with the following content:
+    shape: (10, 3)
+    ┌──────┬────────────┬────────────┐
+    │ time ┆ x_left_pix ┆ y_left_pix │
+    │ ---  ┆ ---        ┆ ---        │
+    │ i64  ┆ i64        ┆ i64        │
+    ╞══════╪════════════╪════════════╡
+    │ 0    ┆ 0          ┆ 0          │
+    │ 0    ┆ 0          ┆ 0          │
+    │ 0    ┆ 0          ┆ 0          │
+    │ 0    ┆ 0          ┆ 0          │
+    │ …    ┆ …          ┆ …          │
+    │ 0    ┆ 0          ┆ 0          │
+    │ 0    ┆ 0          ┆ 0          │
+    │ 0    ┆ 0          ┆ 0          │
+    │ 0    ┆ 0          ┆ 0          │
+    └──────┴────────────┴────────────┘
+
+    We can now load the data into a ``GazeDataFrame`` by specyfing the experimental setting
+    and the names of the pixel position columns.
+
+    >>> from pymovements.gaze.io import from_csv
+    >>> gaze = from_csv(
+    ...     file='tests/gaze/io/files/monocular_example.csv',
+    ...     time_column = 'time',
+    ...     pixel_columns = ['x_left_pix','y_left_pix'],)
+    >>> gaze.frame
+    shape: (10, 2)
+    ┌──────┬───────────┐
+    │ time ┆ pixel     │
+    │ ---  ┆ ---       │
+    │ i64  ┆ list[i64] │
+    ╞══════╪═══════════╡
+    │ 0    ┆ [0, 0]    │
+    │ 0    ┆ [0, 0]    │
+    │ 0    ┆ [0, 0]    │
+    │ 0    ┆ [0, 0]    │
+    │ …    ┆ …         │
+    │ 0    ┆ [0, 0]    │
+    │ 0    ┆ [0, 0]    │
+    │ 0    ┆ [0, 0]    │
+    │ 0    ┆ [0, 0]    │
+    └──────┴───────────┘
+
+    """
+    # read data
+    gaze_data = pl.read_csv(file, **read_csv_kwargs)
+
+    # create gaze data frame
+    gaze_df = GazeDataFrame(
+        gaze_data,
+        experiment=experiment,
+        trial_columns=trial_columns,
+        time_column=time_column,
+        pixel_columns=pixel_columns,
+        position_columns=position_columns,
+        velocity_columns=velocity_columns,
+        acceleration_columns=acceleration_columns,
+    )
+    return gaze_df
diff --git a/tests/gaze/io/csv_test.py b/tests/gaze/io/csv_test.py
new file mode 100644
index 000000000..cd5e35abd
--- /dev/null
+++ b/tests/gaze/io/csv_test.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2023 The pymovements Project Authors
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+"""Test read from csv."""
+import pytest
+
+import pymovements as pm
+
+
+@pytest.mark.parametrize(
+    ('kwargs', 'shape'),
+    [
+        pytest.param(
+            {
+                'file': 'tests/gaze/io/files/monocular_example.csv',
+                'time_column': 'time', 'pixel_columns': ['x_left_pix', 'y_left_pix'],
+            },
+            (10, 2),
+            id='csv_mono_shape',
+        ),
+        pytest.param(
+            {
+                'file': 'tests/gaze/io/files/binocular_example.csv',
+                'time_column': 'time',
+                'pixel_columns': ['x_left_pix', 'y_left_pix', 'x_right_pix', 'y_right_pix'],
+                'position_columns': ['x_left_pos', 'y_left_pos', 'x_right_pos', 'y_right_pos'],
+            },
+            (10, 3),
+            id='csv_bino_shape',
+        ),
+    ],
+)
+def test_shapes(kwargs, shape):
+    gaze_dataframe = pm.gaze.from_csv(**kwargs)
+
+    assert gaze_dataframe.frame.shape == shape
diff --git a/tests/gaze/io/files/binocular_example.csv b/tests/gaze/io/files/binocular_example.csv
new file mode 100644
index 000000000..ddfd55ab4
--- /dev/null
+++ b/tests/gaze/io/files/binocular_example.csv
@@ -0,0 +1,11 @@
+time,x_left_pix,y_left_pix,x_right_pix,y_right_pix,x_left_pos,y_left_pos,x_right_pos,y_right_pos
+0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
diff --git a/tests/gaze/io/files/monocular_example.csv b/tests/gaze/io/files/monocular_example.csv
new file mode 100644
index 000000000..5493f10e4
--- /dev/null
+++ b/tests/gaze/io/files/monocular_example.csv
@@ -0,0 +1,11 @@
+time,x_left_pix,y_left_pix
+0,0,0
+0,0,0
+0,0,0
+0,0,0
+0,0,0
+0,0,0
+0,0,0
+0,0,0
+0,0,0
+0,0,0

From fbd6d37ef972c8ec78bfe3460886dcb2caba8abc Mon Sep 17 00:00:00 2001
From: "Daniel G. Krakowczyk" <daniel.krakowczyk@uni-potsdam.de>
Date: Fri, 22 Sep 2023 08:02:36 +0200
Subject: [PATCH 04/20] feat: Add GazeDataFrame.apply() (#558)

---
 src/pymovements/events/detection/_library.py |  16 ++
 src/pymovements/gaze/gaze_dataframe.py       |  21 ++
 src/pymovements/gaze/transforms.py           |  16 ++
 tests/gaze/apply_test.py                     | 236 +++++++++++++++++++
 4 files changed, 289 insertions(+)
 create mode 100644 tests/gaze/apply_test.py

diff --git a/src/pymovements/events/detection/_library.py b/src/pymovements/events/detection/_library.py
index 6457bd3dc..9585f0418 100644
--- a/src/pymovements/events/detection/_library.py
+++ b/src/pymovements/events/detection/_library.py
@@ -58,6 +58,22 @@ def get(cls, name: str) -> Callable[..., EventDataFrame]:
         """
         return cls.methods[name]
 
+    @classmethod
+    def __contains__(cls, name: str) -> bool:
+        """Check if class contains method of given name.
+
+        Parameters
+        ----------
+        name: str
+            Name of the method to check.
+
+        Returns
+        -------
+        bool
+            True if EventDetectionLibrary contains method with given name, else False.
+        """
+        return name in cls.methods
+
 
 def register_event_detection(
         method: Callable[..., EventDataFrame],
diff --git a/src/pymovements/gaze/gaze_dataframe.py b/src/pymovements/gaze/gaze_dataframe.py
index 29e53a50a..36f0f20f3 100644
--- a/src/pymovements/gaze/gaze_dataframe.py
+++ b/src/pymovements/gaze/gaze_dataframe.py
@@ -224,6 +224,27 @@ def __init__(
         else:
             self.events = events.copy()
 
+    def apply(
+            self,
+            function: str,
+            **kwargs: Any,
+    ) -> None:
+        """Apply preprocessing method to GazeDataFrame.
+
+        Parameters
+        ----------
+        function: str
+            Name of the preprocessing method to apply.
+        kwargs:
+            kwargs that will be forwarded when calling the preprocessing method.
+        """
+        if transforms.TransformLibrary.__contains__(function):
+            self.transform(function, **kwargs)
+        elif pm.events.EventDetectionLibrary.__contains__(function):
+            self.detect(function, **kwargs)
+        else:
+            raise ValueError(f"unsupported method '{function}'")
+
     def transform(
             self,
             transform_method: str | Callable[..., pl.Expr],
diff --git a/src/pymovements/gaze/transforms.py b/src/pymovements/gaze/transforms.py
index ebef4c02f..75f7fb779 100644
--- a/src/pymovements/gaze/transforms.py
+++ b/src/pymovements/gaze/transforms.py
@@ -68,6 +68,22 @@ def get(cls, name: str) -> Callable[..., pl.Expr]:
         """
         return cls.methods[name]
 
+    @classmethod
+    def __contains__(cls, name: str) -> bool:
+        """Check if class contains method of given name.
+
+        Parameters
+        ----------
+        name: str
+            Name of the method to check.
+
+        Returns
+        -------
+        bool
+            True if TransformsLibrary contains method with given name, else False.
+        """
+        return name in cls.methods
+
 
 def register_transform(method: TransformMethod) -> TransformMethod:
     """Register a transform method."""
diff --git a/tests/gaze/apply_test.py b/tests/gaze/apply_test.py
new file mode 100644
index 000000000..44d6d787e
--- /dev/null
+++ b/tests/gaze/apply_test.py
@@ -0,0 +1,236 @@
+# Copyright (c) 2023 The pymovements Project Authors
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+"""Test GazeDataFrame detect method."""
+import numpy as np
+import polars as pl
+import pytest
+from polars.testing import assert_frame_equal
+
+import pymovements as pm
+from pymovements.synthetic import step_function
+
+
+@pytest.mark.parametrize(
+    ('method', 'kwargs', 'gaze', 'expected'),
+    [
+        pytest.param(
+            'ivt',
+            {
+                'velocity_threshold': 1,
+                'minimum_duration': 2,
+                'eye': 'cyclops',
+            },
+            pm.gaze.from_numpy(
+                velocity=step_function(
+                    length=100, steps=[0, 10], values=[(1, 1, 1, 1, 0, 0), (0, 0, 0, 0, 0, 0)],
+                ),
+                orient='row',
+                experiment=pm.Experiment(1024, 768, 38, 30, 60, 'center', 10),
+            ),
+            pm.gaze.from_numpy(
+                velocity=step_function(
+                    length=100, steps=[0, 10], values=[(1, 1, 1, 1, 0, 0), (0, 0, 0, 0, 0, 0)],
+                ),
+                orient='row',
+                experiment=pm.Experiment(1024, 768, 38, 30, 60, 'center', 10),
+                events=pm.events.EventDataFrame(
+                    name='fixation',
+                    onsets=[0],
+                    offsets=[99],
+                ),
+            ),
+            id='ivt_constant_position_monocular_fixation_six_components_eye_cyclops',
+        ),
+
+        pytest.param(
+            'microsaccades',
+            {
+                'threshold': 1e-5,
+            },
+            pm.gaze.from_numpy(
+                velocity=step_function(
+                    length=100,
+                    steps=[20, 30, 70, 80],
+                    values=[(9, 9), (0, 0), (9, 9), (0, 0)],
+                    start_value=(0, 0),
+                ),
+                orient='row',
+                experiment=pm.Experiment(1024, 768, 38, 30, 60, 'center', 10),
+            ),
+            pm.gaze.from_numpy(
+                velocity=step_function(
+                    length=100,
+                    steps=[20, 30, 70, 80],
+                    values=[(9, 9), (0, 0), (9, 9), (0, 0)],
+                    start_value=(0, 0),
+                ),
+                orient='row',
+                experiment=pm.Experiment(1024, 768, 38, 30, 60, 'center', 10),
+                events=pm.EventDataFrame(
+                    name='saccade',
+                    onsets=[20, 70],
+                    offsets=[29, 79],
+                ),
+            ),
+            id='microsaccades_four_steps_two_saccades',
+        ),
+
+        pytest.param(
+            'fill',
+            {},
+            pm.gaze.from_numpy(
+                time=np.arange(0, 100),
+                events=pm.EventDataFrame(
+                    name=['fixation', 'saccade'], onsets=[0, 50], offsets=[40, 100],
+                ),
+            ),
+            pm.gaze.from_numpy(
+                time=np.arange(0, 100),
+                events=pm.EventDataFrame(
+                    name=['fixation', 'saccade', 'unclassified'],
+                    onsets=[0, 50, 40],
+                    offsets=[40, 100, 49],
+                ),
+            ),
+            id='fill_fixation_10_ms_break_then_saccade_until_end_single_fill',
+        ),
+
+        pytest.param(
+            'downsample',
+            {'factor': 2},
+            pm.GazeDataFrame(
+                data=pl.from_dict(
+                    {
+                        'time': np.arange(1000, 1010, 1),
+                        'x_pix': np.arange(0, 1, 0.1),
+                        'y_pix': np.arange(20, 21, 0.1),
+                    },
+                ),
+                pixel_columns=['x_pix', 'y_pix'],
+            ),
+            pm.GazeDataFrame(
+                data=pl.from_dict(
+                    {
+                        'time': np.arange(1000, 1010, 2),
+                        'x_pix': np.arange(0, 1, 0.2),
+                        'y_pix': [20.0, 20.2, 20.4, 20.6, 20.8],
+                    },
+                ),
+                pixel_columns=['x_pix', 'y_pix'],
+            ),
+            id='downsample_factor_2',
+        ),
+
+        pytest.param(
+            'pix2deg',
+            {},
+            pm.GazeDataFrame(
+                data=pl.from_dict(
+                    {
+                        'time': [1000, 1000],
+                        'x_pix': [(100 - 1) / 2, (100 - 1) / 2],
+                        'y_pix': [0.0, 0.0],
+                    },
+                ),
+                experiment=pm.Experiment(100, 100, 100, 100, 100, 'center', 1000),
+                pixel_columns=['x_pix', 'y_pix'],
+            ),
+            pm.GazeDataFrame(
+                data=pl.from_dict(
+                    {
+                        'time': [1000, 1000],
+                        'x_pix': [49.5, 49.5],
+                        'y_pix': [0.0, 0.0],
+                        'x_dva': [26.3354, 26.3354],
+                        'y_dva': [0.0, 0.0],
+                    },
+                ),
+                pixel_columns=['x_pix', 'y_pix'],
+                position_columns=['x_dva', 'y_dva'],
+            ),
+            id='pix2deg_origin_center',
+        ),
+
+        pytest.param(
+            'pos2vel',
+            {'method': 'preceding'},
+            pm.GazeDataFrame(
+                data=pl.from_dict(
+                    {
+                        'trial_id': [1, 1, 1, 2, 2, 2],
+                        'time': [1000, 1001, 1002, 1003, 1004, 1005],
+                        'x': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+                        'y': [1.0, 1.1, 1.2, 1.0, 1.1, 1.2],
+                    },
+                ),
+                experiment=pm.Experiment(100, 100, 100, 100, 100, 'center', 1000),
+                trial_columns='trial_id',
+                position_columns=['x', 'y'],
+            ),
+            pm.GazeDataFrame(
+                data=pl.from_dict(
+                    {
+                        'trial_id': [1, 1, 1, 2, 2, 2],
+                        'time': [1000, 1001, 1002, 1003, 1004, 1005],
+                        'x_dva': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+                        'y_dva': [1.0, 1.1, 1.2, 1.0, 1.1, 1.2],
+                        'x_vel': [None, 0.0, 0.0, None, 0.0, 0.0],
+                        'y_vel': [None, 100.0, 100.0, None, 100.0, 100.0],
+                    },
+                ),
+                position_columns=['x_dva', 'y_dva'],
+                velocity_columns=['x_vel', 'y_vel'],
+            ),
+            id='pos2vel_preceding_trialize_single_column_str',
+        ),
+    ],
+)
+def test_gaze_apply(method, kwargs, gaze, expected):
+    gaze.apply(method, **kwargs)
+    assert_frame_equal(gaze.frame, expected.frame)
+    assert_frame_equal(gaze.events.frame, expected.events.frame)
+
+
+@pytest.mark.parametrize(
+    ('method', 'kwargs', 'gaze', 'exception', 'exception_msg'),
+    [
+        pytest.param(
+            'foobar',
+            {},
+            pm.gaze.from_numpy(
+                velocity=step_function(
+                    length=100, steps=[0, 10], values=[(1, 1, 1, 1, 0, 0), (0, 0, 0, 0, 0, 0)],
+                ),
+                orient='row',
+                experiment=pm.Experiment(1024, 768, 38, 30, 60, 'center', 10),
+            ),
+            ValueError,
+            "unsupported method 'foobar'",
+            id='unknown_method',
+        ),
+
+    ],
+)
+def test_gaze_apply_raises_exception(method, kwargs, gaze, exception, exception_msg):
+    with pytest.raises(exception) as exc_info:
+        gaze.apply(method, **kwargs)
+
+    msg, = exc_info.value.args
+    assert msg == exception_msg

From 9abc0c3b1fa18a73605bc8ffb6784e6df9042185 Mon Sep 17 00:00:00 2001
From: "David R. Reich" <43832476+SiQube@users.noreply.github.com>
Date: Fri, 22 Sep 2023 02:25:50 -0400
Subject: [PATCH 05/20] docs: add pip installation of pylint to CONTRIBUTING.md
 (#556)

---
 CONTRIBUTING.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 040727e4e..0a5122d1c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -131,6 +131,7 @@ You can check your code style by using [pre-commit](https://www.pre-commit.com).
 
 ```bash
 pip install pre-commit
+pip install pylint
 pre-commit install
 ```
 

From 7730fe4ec70e6dcfcf59f5b06dd0cbe611ebfe95 Mon Sep 17 00:00:00 2001
From: "David R. Reich" <43832476+SiQube@users.noreply.github.com>
Date: Fri, 22 Sep 2023 03:01:30 -0400
Subject: [PATCH 06/20] ci: remove unused pre-commit env from tox.ini (#564)

---
 tox.ini | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tox.ini b/tox.ini
index 973f1e4e2..50571bb92 100644
--- a/tox.ini
+++ b/tox.ini
@@ -10,7 +10,6 @@ envlist =
     flake8
     docs
     mypy
-    pre-commit
     coverage
 
 
@@ -77,11 +76,6 @@ commands =
     --disable=missing-class-docstring,missing-function-docstring,protected-access {toxinidir}/tests
 
 
-[testenv:pre-commit]
-deps = pre-commit
-commands = pre-commit run --all-files --show-diff-on-failure
-
-
 [testenv:mypy]
 changedir = {toxinidir}
 deps =

From ea00c4c5b74c2a9d66b6d58660f1d3df38a79893 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 22 Sep 2023 09:28:39 +0200
Subject: [PATCH 07/20] ci: pre-commit autoupdate (#566)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/asottile/pyupgrade: v3.10.1 → v3.11.0](https://github.com/asottile/pyupgrade/compare/v3.10.1...v3.11.0)
- [github.com/asottile/reorder-python-imports: v3.10.0 → v3.11.0](https://github.com/asottile/reorder-python-imports/compare/v3.10.0...v3.11.0)
- [github.com/asottile/reorder-python-imports: v3.10.0 → v3.11.0](https://github.com/asottile/reorder-python-imports/compare/v3.10.0...v3.11.0)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 90b65b910..66545eacd 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -14,17 +14,17 @@ repos:
     -   id: add-trailing-comma
         args: [--py36-plus]
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v3.10.1
+    rev: v3.11.0
     hooks:
     -   id: pyupgrade
         args: [--py38-plus]
 -   repo: https://github.com/asottile/reorder-python-imports
-    rev: v3.10.0
+    rev: v3.11.0
     hooks:
     -   id: reorder-python-imports
         args: [--application-directories=src, --py38-plus]
 -   repo: https://github.com/asottile/reorder-python-imports
-    rev: v3.10.0
+    rev: v3.11.0
     hooks:
     -   id: reorder-python-imports
         files: ^src/

From 2f4c3cc1ef37a25d44545d03a7a9f0e280a15fab Mon Sep 17 00:00:00 2001
From: prassepaul <prasse.paul@googlemail.com>
Date: Fri, 22 Sep 2023 09:51:31 +0200
Subject: [PATCH 08/20] feat: Add public dataset GazeOnFaces (#567)

Co-authored-by: prassepaul <paul.prasse@uni-potsdam.de>
---
 docs/source/bibliography.bib              |  11 ++
 src/pymovements/datasets/__init__.py      |   3 +
 src/pymovements/datasets/gaze_on_faces.py | 149 ++++++++++++++++++++++
 tests/datasets/datasets_test.py           |   2 +
 tests/datasets/gaze_on_faces_test.py      |  79 ++++++++++++
 5 files changed, 244 insertions(+)
 create mode 100644 src/pymovements/datasets/gaze_on_faces.py
 create mode 100644 tests/datasets/gaze_on_faces_test.py

diff --git a/docs/source/bibliography.bib b/docs/source/bibliography.bib
index 0992286e2..3a7ab8bbd 100644
--- a/docs/source/bibliography.bib
+++ b/docs/source/bibliography.bib
@@ -65,3 +65,14 @@ @article{GazeBaseVR
     journal = {Scientific Data},
     doi = {10.1038/s41597-023-02075-5},
 }
+
+@article{GazeOnFaces,
+  title={Face exploration dynamics differentiate men and women},
+  author={Coutrot, Antoine and Binetti, Nicola and Harrison, Charlotte and Mareschal, Isabelle and Johnston, Alan},
+  journal={Journal of vision},
+  volume={16},
+  number={14},
+  pages={16--16},
+  year={2016},
+  publisher={The Association for Research in Vision and Ophthalmology}
+}
diff --git a/src/pymovements/datasets/__init__.py b/src/pymovements/datasets/__init__.py
index b1eb00ee2..75597101c 100644
--- a/src/pymovements/datasets/__init__.py
+++ b/src/pymovements/datasets/__init__.py
@@ -27,6 +27,7 @@
 
     pymovements.datasets.GazeBase
     pymovements.datasets.GazeBaseVR
+    pymovements.datasets.GazeOnFaces
     pymovements.datasets.JuDo1000
 
 
@@ -39,6 +40,7 @@
     pymovements.datasets.ToyDataset
     pymovements.datasets.ToyDatasetEyeLink
 """
+from pymovements.datasets.gaze_on_faces import GazeOnFaces
 from pymovements.datasets.gazebase import GazeBase
 from pymovements.datasets.gazebasevr import GazeBaseVR
 from pymovements.datasets.judo1000 import JuDo1000
@@ -49,6 +51,7 @@
 __all__ = [
     'GazeBase',
     'GazeBaseVR',
+    'GazeOnFaces',
     'JuDo1000',
     'ToyDataset',
     'ToyDatasetEyeLink',
diff --git a/src/pymovements/datasets/gaze_on_faces.py b/src/pymovements/datasets/gaze_on_faces.py
new file mode 100644
index 000000000..0a5795dfe
--- /dev/null
+++ b/src/pymovements/datasets/gaze_on_faces.py
@@ -0,0 +1,149 @@
+# Copyright (c) 2022-2023 The pymovements Project Authors
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+"""This module provides an interface to the GazeOnFaces dataset."""
+from __future__ import annotations
+
+from dataclasses import dataclass
+from dataclasses import field
+from typing import Any
+
+import polars as pl
+
+from pymovements.dataset.dataset_definition import DatasetDefinition
+from pymovements.dataset.dataset_library import register_dataset
+from pymovements.gaze.experiment import Experiment
+
+
+@dataclass
+@register_dataset
+class GazeOnFaces(DatasetDefinition):
+    """GazeBaseVR dataset :cite:p:`GazeOnFaces`.
+
+    This dataset includes monocular eye tracking data from single participants in a single
+    session. Eye movements are recorded at a sampling frequency of 60 Hz
+    using an EyeLink 1000 video-based eye tracker and are provided as pixel coordinates.
+
+    Participants were sat 57 cm away from the screen (19inch LCD monitor,
+    screen res=1280×1024, 60 Hz). Recordings of the eye movements of one eye in monocular
+    pupil/corneal reflection tracking mode.
+
+    Check the respective paper for details :cite:p:`GazeOnFaces`.
+
+    Attributes
+    ----------
+    name : str
+        The name of the dataset.
+
+    mirrors : tuple[str, ...]
+        A tuple of mirrors of the dataset. Each entry must be of type `str` and end with a '/'.
+
+    resources : tuple[dict[str, str], ...]
+        A tuple of dataset resources. Each list entry must be a dictionary with the following keys:
+        - `resource`: The url suffix of the resource. This will be concatenated with the mirror.
+        - `filename`: The filename under which the file is saved as.
+        - `md5`: The MD5 checksum of the respective file.
+
+    experiment : Experiment
+        The experiment definition.
+
+    filename_format : str
+        Regular expression which will be matched before trying to load the file. Namedgroups will
+        appear in the `fileinfo` dataframe.
+
+    filename_format_dtypes : dict[str, type], optional
+        If named groups are present in the `filename_format`, this makes it possible to cast
+        specific named groups to a particular datatype.
+
+    column_map : dict[str, str]
+        The keys are the columns to read, the values are the names to which they should be renamed.
+
+    custom_read_kwargs : dict[str, Any], optional
+        If specified, these keyword arguments will be passed to the file reading function.
+
+    Examples
+    --------
+    Initialize your :py:class:`~pymovements.PublicDataset` object with the
+    :py:class:`~pymovements.GazeOnFaces` definition:
+
+    >>> import pymovements as pm
+    >>>
+    >>> dataset = pm.Dataset("GazeOnFaces", path='data/GazeOnFaces')
+
+    Download the dataset resources resources:
+
+    >>> dataset.download()# doctest: +SKIP
+
+    Load the data into memory:
+
+    >>> dataset.load()# doctest: +SKIP
+    """
+
+    # pylint: disable=similarities
+    # The PublicDatasetDefinition child classes potentially share code chunks for definitions.
+
+    name: str = 'GazeOnFaces'
+
+    mirrors: tuple[str, ...] = (
+        'https://uncloud.univ-nantes.fr/index.php/s/',
+    )
+
+    resources: tuple[dict[str, str], ...] = (
+        {
+            'resource': '8KW6dEdyBJqxpmo/download?path=%2F&files=gaze_csv.zip',
+            'filename': 'gaze_csv.zip',
+            'md5': 'fe219f07c9253cd9aaee6bd50233c034',
+        },
+    )
+
+    experiment: Experiment = Experiment(
+        screen_width_px=1280,
+        screen_height_px=1024,
+        screen_width_cm=38,
+        screen_height_cm=30,
+        distance_cm=57,
+        origin='center',
+        sampling_rate=60,
+    )
+
+    filename_format: str = r'gaze_sub{sub_id:d}_trial{trial_id:d}.csv'
+
+    filename_format_dtypes: dict[str, type] = field(
+        default_factory=lambda: {
+            'sub_id': int,
+            'trial_id': int,
+        },
+    )
+
+    trial_columns: list[str] = field(default_factory=lambda: ['sub_id', 'trial_id'])
+
+    time_column: Any = None
+
+    pixel_columns: list[str] = field(default_factory=lambda: ['x', 'y'])
+
+    column_map: dict[str, str] = field(default_factory=lambda: {})
+
+    custom_read_kwargs: dict[str, Any] = field(
+        default_factory=lambda: {
+            'separator': ',',
+            'has_header': False,
+            'new_columns': ['x', 'y'],
+            'dtypes': [pl.Float32, pl.Float32],
+        },
+    )
diff --git a/tests/datasets/datasets_test.py b/tests/datasets/datasets_test.py
index 6df6db7d0..074002c10 100644
--- a/tests/datasets/datasets_test.py
+++ b/tests/datasets/datasets_test.py
@@ -31,6 +31,7 @@
         pytest.param(pm.datasets.ToyDataset, 'ToyDataset', id='ToyDataset'),
         pytest.param(pm.datasets.GazeBase, 'GazeBase', id='GazeBase'),
         pytest.param(pm.datasets.GazeBaseVR, 'GazeBaseVR', id='GazeBaseVR'),
+        pytest.param(pm.datasets.GazeOnFaces, 'GazeOnFaces', id='GazeOnFaces'),
         pytest.param(pm.datasets.JuDo1000, 'JuDo1000', id='JuDo1000'),
     ],
 )
@@ -46,6 +47,7 @@ def test_public_dataset_registered(definition_class, dataset_name):
         pytest.param(pm.datasets.ToyDataset, id='ToyDataset'),
         pytest.param(pm.datasets.GazeBase, id='GazeBase'),
         pytest.param(pm.datasets.GazeBaseVR, id='GazeBaseVR'),
+        pytest.param(pm.datasets.GazeOnFaces, id='GazeOnFaces'),
         pytest.param(pm.datasets.JuDo1000, id='JuDo1000'),
     ],
 )
diff --git a/tests/datasets/gaze_on_faces_test.py b/tests/datasets/gaze_on_faces_test.py
new file mode 100644
index 000000000..86e28e444
--- /dev/null
+++ b/tests/datasets/gaze_on_faces_test.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2023 The pymovements Project Authors
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+"""Test all functionality in pymovements.dataset.gaze_on_faces."""
+from pathlib import Path
+
+import pytest
+
+import pymovements as pm
+
+
+@pytest.mark.parametrize(
+    'init_path, expected_paths',
+    [
+        pytest.param(
+            '/data/set/path',
+            {
+                'root': Path('/data/set/path/'),
+                'dataset': Path('/data/set/path/'),
+                'download': Path('/data/set/path/downloads'),
+            },
+        ),
+        pytest.param(
+            pm.DatasetPaths(root='/data/set/path'),
+            {
+                'root': Path('/data/set/path/'),
+                'dataset': Path('/data/set/path/GazeOnFaces'),
+                'download': Path('/data/set/path/GazeOnFaces/downloads'),
+            },
+        ),
+        pytest.param(
+            pm.DatasetPaths(root='/data/set/path', dataset='.'),
+            {
+                'root': Path('/data/set/path/'),
+                'dataset': Path('/data/set/path/'),
+                'download': Path('/data/set/path/downloads'),
+            },
+        ),
+        pytest.param(
+            pm.DatasetPaths(root='/data/set/path', dataset='dataset'),
+            {
+                'root': Path('/data/set/path/'),
+                'dataset': Path('/data/set/path/dataset'),
+                'download': Path('/data/set/path/dataset/downloads'),
+            },
+        ),
+        pytest.param(
+            pm.DatasetPaths(root='/data/set/path', downloads='custom_downloads'),
+            {
+                'root': Path('/data/set/path/'),
+                'dataset': Path('/data/set/path/GazeOnFaces'),
+                'download': Path('/data/set/path/GazeOnFaces/custom_downloads'),
+            },
+        ),
+    ],
+)
+def test_paths(init_path, expected_paths):
+    dataset = pm.Dataset(pm.datasets.GazeOnFaces, path=init_path)
+
+    assert dataset.paths.root == expected_paths['root']
+    assert dataset.path == expected_paths['dataset']
+    assert dataset.paths.dataset == expected_paths['dataset']
+    assert dataset.paths.downloads == expected_paths['download']

From dea5439ff1d3ced98abb3cd2af9852037ebf756b Mon Sep 17 00:00:00 2001
From: "David R. Reich" <43832476+SiQube@users.noreply.github.com>
Date: Fri, 22 Sep 2023 08:21:35 -0400
Subject: [PATCH 09/20] chore: Migrate typing to matplotlib v3.8.0 (#563)

---
 .pre-commit-config.yaml                       |  2 +-
 pyproject.toml                                |  4 +-
 .../plotting/main_sequence_plot.py            |  6 +-
 src/pymovements/plotting/traceplot.py         | 88 ++++++++++++-------
 tests/plotting/main_sequence_plot_test.py     | 25 ++++++
 tests/plotting/traceplot_test.py              |  4 +
 tox.ini                                       |  2 +-
 7 files changed, 93 insertions(+), 38 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 66545eacd..7713bf914 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -67,7 +67,7 @@ repos:
     rev: v1.5.1
     hooks:
     -   id: mypy
-        additional_dependencies: [types-all]
+        additional_dependencies: [types-all, pandas-stubs, types-tqdm]
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.4.0
     hooks:
diff --git a/pyproject.toml b/pyproject.toml
index d157d537f..e5dfb8a4f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,7 +61,8 @@ test = [
   "pybtex",
   "pytest>=6.0.0",
   "pytest-cov>=4.0.0",
-  "types-tqdm"
+  "types-tqdm",
+  "typing_extensions"
 ]
 
 [project.urls]
@@ -110,7 +111,6 @@ check_untyped_defs = true
 disallow_incomplete_defs = true
 disallow_untyped_defs = true
 warn_redundant_casts = true
-warn_unused_ignores = true
 
 [[tool.mypy.overrides]]
 module = "scipy.*"
diff --git a/src/pymovements/plotting/main_sequence_plot.py b/src/pymovements/plotting/main_sequence_plot.py
index 551e73152..7edc7238b 100644
--- a/src/pymovements/plotting/main_sequence_plot.py
+++ b/src/pymovements/plotting/main_sequence_plot.py
@@ -110,10 +110,12 @@ def main_sequence_plot(
         alpha=alpha,
         s=marker_size,
         marker=marker,
-        **kwargs,
+        # to handle after  https://github.com/pydata/xarray/pull/8030 is merged
+        **kwargs,  # type: ignore
     )
 
-    plt.title(title)
+    if title:
+        plt.title(title)
     plt.xlabel('Amplitude [dva]')
     plt.ylabel('Peak Velocity [dva/s]')
 
diff --git a/src/pymovements/plotting/traceplot.py b/src/pymovements/plotting/traceplot.py
index 9b6b62988..6839f840c 100644
--- a/src/pymovements/plotting/traceplot.py
+++ b/src/pymovements/plotting/traceplot.py
@@ -21,12 +21,17 @@
 from __future__ import annotations
 
 import sys
+from typing import Dict
+from typing import Literal
+from typing import Sequence
+from typing import Tuple
 
-import matplotlib
+import matplotlib.colors
 import matplotlib.pyplot as plt
+import matplotlib.scale
 import numpy as np
-from matplotlib import colors
 from matplotlib.collections import LineCollection
+from typing_extensions import TypeAlias
 
 from pymovements.gaze.gaze_dataframe import GazeDataFrame
 
@@ -37,44 +42,59 @@
 if 'pytest' in sys.modules:  # pragma: no cover
     matplotlib.use('Agg')
 
-DEFAULT_SEGMENTDATA = {
+LinearSegmentedColormapType: TypeAlias = Dict[
+    Literal['red', 'green', 'blue', 'alpha'], Sequence[Tuple[float, ...]],
+]
+
+DEFAULT_SEGMENTDATA: LinearSegmentedColormapType = {
     'red': [
-        [0.0, 0.0, 0.0],
-        [0.5, 1.0, 1.0],
-        [1.0, 1.0, 1.0],
+        (0.0, 0.0, 0.0),
+        (0.5, 1.0, 1.0),
+        (1.0, 1.0, 1.0),
     ],
     'green': [
-        [0.0, 0.0, 0.0],
-        [0.5, 1.0, 1.0],
-        [1.0, 0.0, 0.0],
+        (0.0, 0.0, 0.0),
+        (0.5, 1.0, 1.0),
+        (1.0, 0.0, 0.0),
     ],
     'blue': [
-        [0.0, 0.0, 0.0],
-        [0.5, 0.0, 0.0],
-        [1.0, 0.0, 0.0],
+        (0.0, 0.0, 0.0),
+        (0.5, 0.0, 0.0),
+        (1.0, 0.0, 0.0),
+    ],
+    'alpha': [
+        (1.0, 1.0, 1.0),
+        (1.0, 1.0, 1.0),
+        (1.0, 1.0, 1.0),
     ],
 }
 
 
-DEFAULT_SEGMENTDATA_TWOSLOPE = {
+DEFAULT_SEGMENTDATA_TWOSLOPE: LinearSegmentedColormapType = {
     'red': [
-        [0.0, 0.0, 0.0],
-        [0.5, 0.0, 0.0],
-        [0.75, 1.0, 1.0],
-        [1.0, 1.0, 1.0],
+        (0.0, 0.0, 0.0),
+        (0.5, 0.0, 0.0),
+        (0.75, 1.0, 1.0),
+        (1.0, 1.0, 1.0),
     ],
     'green': [
-        [0.0, 0.0, 0.0],
-        [0.25, 1.0, 1.0],
-        [0.5, 0.0, 0.0],
-        [0.75, 1.0, 1.0],
-        [1.0, 0.0, 0.0],
+        (0.0, 0.0, 0.0),
+        (0.25, 1.0, 1.0),
+        (0.5, 0.0, 0.0),
+        (0.75, 1.0, 1.0),
+        (1.0, 0.0, 0.0),
     ],
     'blue': [
-        [0.0, 1.0, 1.0],
-        [0.25, 1.0, 1.0],
-        [0.5, 0.0, 0.0],
-        [1.0, 0.0, 0.0],
+        (0.0, 1.0, 1.0),
+        (0.25, 1.0, 1.0),
+        (0.5, 0.0, 0.0),
+        (1.0, 0.0, 0.0),
+    ],
+    'alpha': [
+        (1.0, 1.0, 1.0),
+        (1.0, 1.0, 1.0),
+        (1.0, 1.0, 1.0),
+        (1.0, 1.0, 1.0),
     ],
 }
 
@@ -83,9 +103,9 @@ def traceplot(
         gaze: GazeDataFrame,
         position_column: str = 'pixel',
         cval: np.ndarray | None = None,  # pragma: no cover
-        cmap: colors.Colormap | None = None,
-        cmap_norm: colors.Normalize | str | None = None,
-        cmap_segmentdata: dict[str, list[list[float]]] | None = None,
+        cmap: matplotlib.colors.Colormap | None = None,
+        cmap_norm: matplotlib.colors.Normalize | str | None = None,
+        cmap_segmentdata: LinearSegmentedColormapType | None = None,
         cbar_label: str | None = None,
         show_cbar: bool = False,
         padding: float | None = None,
@@ -148,7 +168,7 @@ def traceplot(
         show_cbar = False
 
     cval_max = np.nanmax(np.abs(cval))
-    cval_min = np.nanmin(cval)
+    cval_min = np.nanmin(cval).astype(float)
 
     if cmap_norm is None:
         if cval_max and cval_min < 0:
@@ -183,7 +203,10 @@ def traceplot(
     elif isinstance(cmap_norm, str):
         # pylint: disable=protected-access
 
-        if (scale_class := matplotlib.scale._scale_mapping.get(cmap_norm, None)) is None:
+        # to handle after https://github.com/pydata/xarray/pull/8030 is merged
+        if (
+            scale_class := matplotlib.scale._scale_mapping.get(cmap_norm, None)  # type: ignore
+        ) is None:
             raise ValueError(f'cmap_norm string {cmap_norm} is not supported')
 
         norm_class = matplotlib.colors.make_norm_from_scale(scale_class)
@@ -218,7 +241,8 @@ def traceplot(
         # sm.set_array(cval)
         fig.colorbar(line, label=cbar_label, ax=ax)
 
-    ax.set_title(title)
+    if title:
+        ax.set_title(title)
 
     if savepath is not None:
         fig.savefig(savepath)
diff --git a/tests/plotting/main_sequence_plot_test.py b/tests/plotting/main_sequence_plot_test.py
index 2c752839c..f960ea3bc 100644
--- a/tests/plotting/main_sequence_plot_test.py
+++ b/tests/plotting/main_sequence_plot_test.py
@@ -169,6 +169,31 @@ def test_main_sequence_plot_not_show(input_df, show, monkeypatch):
     mock_function.assert_not_called()
 
 
+@pytest.mark.parametrize(
+    ('input_df', 'title'),
+    [
+        pytest.param(
+            EventDataFrame(
+                pl.DataFrame(
+                    {
+                        'amplitude': np.arange(100),
+                        'peak_velocity': np.linspace(10, 50, num=100),
+                        'name': ['saccade' for _ in range(100)],
+                    },
+                ),
+            ),
+            'foo',
+            id='do_not_show_plot',
+        ),
+    ],
+)
+def test_main_sequence_plot_set_title(input_df, title, monkeypatch):
+    mock_function = Mock()
+    monkeypatch.setattr(plt, 'title', mock_function)
+    main_sequence_plot(input_df, title=title)
+    plt.close()
+
+
 @pytest.mark.parametrize(
     ('input_df', 'expected_error', 'error_msg'),
     [
diff --git a/tests/plotting/traceplot_test.py b/tests/plotting/traceplot_test.py
index 0d1b2a3fd..8ded6fef2 100644
--- a/tests/plotting/traceplot_test.py
+++ b/tests/plotting/traceplot_test.py
@@ -109,6 +109,10 @@ def gaze_fixture():
             {'cval': np.arange(0, 200), 'show_cbar': False},
             id='show_cbar_false',
         ),
+        pytest.param(
+            {'cval': np.arange(0, 200), 'title': 'foo'},
+            id='set_title',
+        ),
     ],
 )
 def test_traceplot_show(gaze, kwargs, monkeypatch):
diff --git a/tox.ini b/tox.ini
index 50571bb92..5e5925bf9 100644
--- a/tox.ini
+++ b/tox.ini
@@ -80,9 +80,9 @@ commands =
 changedir = {toxinidir}
 deps =
     mypy
-    .[test]
     pandas-stubs
     types-all
+    types-tqdm
 commands =
     mypy {toxinidir}/src
 

From 23d8f62014555b7708bfe335244604c02cc9b8e7 Mon Sep 17 00:00:00 2001
From: prassepaul <prasse.paul@googlemail.com>
Date: Fri, 22 Sep 2023 18:25:25 +0200
Subject: [PATCH 10/20] feat: Add gaze.from_ipc() (#568)

---
 docs/source/bibliography.bib                  |   2 +-
 src/pymovements/gaze/__init__.py              |   2 +
 src/pymovements/gaze/io.py                    |  83 +++++++++++++++---
 tests/gaze/io/files/binocular_example.csv     |  18 ++--
 tests/gaze/io/files/binocular_example.feather | Bin 0 -> 2239 bytes
 tests/gaze/io/files/monocular_example.csv     |  18 ++--
 tests/gaze/io/files/monocular_example.feather | Bin 0 -> 1295 bytes
 tests/gaze/io/ipc_test.py                     |  48 ++++++++++
 8 files changed, 138 insertions(+), 33 deletions(-)
 create mode 100644 tests/gaze/io/files/binocular_example.feather
 create mode 100644 tests/gaze/io/files/monocular_example.feather
 create mode 100644 tests/gaze/io/ipc_test.py

diff --git a/docs/source/bibliography.bib b/docs/source/bibliography.bib
index 3a7ab8bbd..f240e2163 100644
--- a/docs/source/bibliography.bib
+++ b/docs/source/bibliography.bib
@@ -74,5 +74,5 @@ @article{GazeOnFaces
   number={14},
   pages={16--16},
   year={2016},
-  publisher={The Association for Research in Vision and Ophthalmology}
+  publisher={The Association for Research in Vision and Ophthalmology},
 }
diff --git a/src/pymovements/gaze/__init__.py b/src/pymovements/gaze/__init__.py
index 0e2ceed5a..dc7d3cf18 100644
--- a/src/pymovements/gaze/__init__.py
+++ b/src/pymovements/gaze/__init__.py
@@ -63,6 +63,7 @@
 from pymovements.gaze.integration import from_numpy
 from pymovements.gaze.integration import from_pandas
 from pymovements.gaze.io import from_csv
+from pymovements.gaze.io import from_ipc
 from pymovements.gaze.screen import Screen
 
 
@@ -75,4 +76,5 @@
     'transforms_numpy',
     'transforms',
     'from_csv',
+    'from_ipc',
 ]
diff --git a/src/pymovements/gaze/io.py b/src/pymovements/gaze/io.py
index 81bd391f7..9135864a1 100644
--- a/src/pymovements/gaze/io.py
+++ b/src/pymovements/gaze/io.py
@@ -106,14 +106,14 @@ def from_csv(
     │ i64  ┆ i64        ┆ i64        │
     ╞══════╪════════════╪════════════╡
     │ 0    ┆ 0          ┆ 0          │
-    │ 0    ┆ 0          ┆ 0          │
-    │ 0    ┆ 0          ┆ 0          │
-    │ 0    ┆ 0          ┆ 0          │
+    │ 1    ┆ 0          ┆ 0          │
+    │ 2    ┆ 0          ┆ 0          │
+    │ 3    ┆ 0          ┆ 0          │
     │ …    ┆ …          ┆ …          │
-    │ 0    ┆ 0          ┆ 0          │
-    │ 0    ┆ 0          ┆ 0          │
-    │ 0    ┆ 0          ┆ 0          │
-    │ 0    ┆ 0          ┆ 0          │
+    │ 6    ┆ 0          ┆ 0          │
+    │ 7    ┆ 0          ┆ 0          │
+    │ 8    ┆ 0          ┆ 0          │
+    │ 9    ┆ 0          ┆ 0          │
     └──────┴────────────┴────────────┘
 
     We can now load the data into a ``GazeDataFrame`` by specyfing the experimental setting
@@ -132,14 +132,14 @@ def from_csv(
     │ i64  ┆ list[i64] │
     ╞══════╪═══════════╡
     │ 0    ┆ [0, 0]    │
-    │ 0    ┆ [0, 0]    │
-    │ 0    ┆ [0, 0]    │
-    │ 0    ┆ [0, 0]    │
+    │ 1    ┆ [0, 0]    │
+    │ 2    ┆ [0, 0]    │
+    │ 3    ┆ [0, 0]    │
     │ …    ┆ …         │
-    │ 0    ┆ [0, 0]    │
-    │ 0    ┆ [0, 0]    │
-    │ 0    ┆ [0, 0]    │
-    │ 0    ┆ [0, 0]    │
+    │ 6    ┆ [0, 0]    │
+    │ 7    ┆ [0, 0]    │
+    │ 8    ┆ [0, 0]    │
+    │ 9    ┆ [0, 0]    │
     └──────┴───────────┘
 
     """
@@ -158,3 +158,58 @@ def from_csv(
         acceleration_columns=acceleration_columns,
     )
     return gaze_df
+
+
+def from_ipc(
+        file: str | Path,
+        experiment: Experiment | None = None,
+        **read_ipc_kwargs: Any,
+) -> GazeDataFrame:
+    """Initialize a :py:class:`pymovements.gaze.gaze_dataframe.GazeDataFrame`.
+
+    Parameters
+    ----------
+    file:
+        Path of IPC/feather file.
+    experiment : Experiment
+        The experiment definition.
+    **read_ipc_kwargs:
+            Additional keyword arguments to be passed to polars to read in the ipc file.
+
+    Examples
+    --------
+    Let's assume we have an IPC file stored at `tests/gaze/io/files/monocular_example.feather`.
+    We can then load the data into a ``GazeDataFrame``:
+
+    >>> from pymovements.gaze.io import from_ipc
+    >>> gaze = from_ipc(
+    ...     file='tests/gaze/io/files/monocular_example.feather',
+    ...     )
+    >>> gaze.frame
+    shape: (10, 2)
+    ┌──────┬───────────┐
+    │ time ┆ pixel     │
+    │ ---  ┆ ---       │
+    │ i64  ┆ list[i64] │
+    ╞══════╪═══════════╡
+    │ 0    ┆ [0, 0]    │
+    │ 1    ┆ [0, 0]    │
+    │ 2    ┆ [0, 0]    │
+    │ 3    ┆ [0, 0]    │
+    │ …    ┆ …         │
+    │ 6    ┆ [0, 0]    │
+    │ 7    ┆ [0, 0]    │
+    │ 8    ┆ [0, 0]    │
+    │ 9    ┆ [0, 0]    │
+    └──────┴───────────┘
+
+    """
+    # read data
+    gaze_data = pl.read_ipc(file, **read_ipc_kwargs)
+
+    # create gaze data frame
+    gaze_df = GazeDataFrame(
+        gaze_data,
+        experiment=experiment,
+    )
+    return gaze_df
diff --git a/tests/gaze/io/files/binocular_example.csv b/tests/gaze/io/files/binocular_example.csv
index ddfd55ab4..a1f3c95f6 100644
--- a/tests/gaze/io/files/binocular_example.csv
+++ b/tests/gaze/io/files/binocular_example.csv
@@ -1,11 +1,11 @@
 time,x_left_pix,y_left_pix,x_right_pix,y_right_pix,x_left_pos,y_left_pos,x_right_pos,y_right_pos
 0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
-0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
-0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
-0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
-0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
-0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
-0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
-0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
-0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
-0,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+1,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+2,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+3,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+4,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+5,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+6,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+7,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+8,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
+9,0,0,0,0,-23.104783, -13.489493,-23.104783, -13.489493
diff --git a/tests/gaze/io/files/binocular_example.feather b/tests/gaze/io/files/binocular_example.feather
new file mode 100644
index 0000000000000000000000000000000000000000..e52e31be25c842e5c9a709640379f21d53dfa3c2
GIT binary patch
literal 2239
zcmeHJy-EW?5S}DwyqpJ~Q6Y#$3JVbgt+Wv?ot+_#K7c6>j0u<`r4L|XV=KOhaPN?J
zun;S;kRZhG+uf1lh~f`wlR5TgXXe|Py_vh&YqVMihub1!&}Biy6Ol*Em%u%NLIRl+
zA9qilp%0<)anV}By#~mIkwhL~?_qvSJw{<!B$6tSkt~=sq4CHV#BPDtF$@;Y@>rN;
z1YA+dxIA1UNx$8p{snq!xRYM@Ea@lRQ|jq=0Dgk;nLX?RS66It@*bYxGX{p)@TFp+
zPTI>1?7d{rW^C@`C%*ehrwuozF8c_qIWrUJZGB1-%A(0z2*Vv>23!P6F^I7gVD14M
zp^ooU)0xh55S5Qq&qQR&$+2?eF-x(Nb$yz%tE}s}mbuLDH7~DcgG|8Z?OC`!f^LQH
zGKqO6F+Qnmnan!Q_l;Ds?Av#ZqyG3^ks{09YFBges^fLX*Bt-o869NGejx+@Rlmk&
zd49OEliu%L4X;NV=~Vt&KF}h^d>!<=W1o&8vsc@mD(I6{e1|++?0I$LnnhEah87=d
SNYiuikI%(h6ry2%TwVb)SUQOS

literal 0
HcmV?d00001

diff --git a/tests/gaze/io/files/monocular_example.csv b/tests/gaze/io/files/monocular_example.csv
index 5493f10e4..461d3e5f9 100644
--- a/tests/gaze/io/files/monocular_example.csv
+++ b/tests/gaze/io/files/monocular_example.csv
@@ -1,11 +1,11 @@
 time,x_left_pix,y_left_pix
 0,0,0
-0,0,0
-0,0,0
-0,0,0
-0,0,0
-0,0,0
-0,0,0
-0,0,0
-0,0,0
-0,0,0
+1,0,0
+2,0,0
+3,0,0
+4,0,0
+5,0,0
+6,0,0
+7,0,0
+8,0,0
+9,0,0
diff --git a/tests/gaze/io/files/monocular_example.feather b/tests/gaze/io/files/monocular_example.feather
new file mode 100644
index 0000000000000000000000000000000000000000..cdb352ff82b8cf8a51e2ff5652e445dbc9dc9558
GIT binary patch
literal 1295
zcmd^9J5B>J5PcyAoMnYJ3z1TC1qvj}<OHBJaDgpWA`(T4lsN)N$Wb^AqM$&Cf&wYL
z=kZXifC4)5BJcd3#^dog)7k9e>J-=zu?GYIFVv4I1L#1314MoU-smssiTo+L!|z?v
zfG5V5)Ni=PT)h!cp(d-sF!9TjP5H$=r##_GTqf#r8Nw@h6G~MPF<`F91+<H1PJwsE
z13kOcZ`);~z5kkf(asy?X6nr&rPq6}A_(hCA0>`b%_SKR<Q5jjDs#*%$PeUJ*vs*+
z#ASA_IzhLu-Jff4Gc|_B`qKgQse3K%?%F!TcknHAw}SEs>Vu$O3x1w`Gt&)A9c25r
z!uR$4d>>7m;Ahbs3Wp}xVMCqePiM@Oe<d`pI*|8C?_fe(@62WvoWq)Z(krsJIL?>-
UOpf3)ZWgxZy`QlW)AUF90T5$zi~s-t

literal 0
HcmV?d00001

diff --git a/tests/gaze/io/ipc_test.py b/tests/gaze/io/ipc_test.py
new file mode 100644
index 000000000..7a91817fa
--- /dev/null
+++ b/tests/gaze/io/ipc_test.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2023 The pymovements Project Authors
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+"""Test read from IPC/feather."""
+import pytest
+
+import pymovements as pm
+
+
+@pytest.mark.parametrize(
+    ('kwargs', 'shape'),
+    [
+        pytest.param(
+            {
+                'file': 'tests/gaze/io/files/monocular_example.feather',
+            },
+            (10, 2),
+            id='feather_mono_shape',
+        ),
+        pytest.param(
+            {
+                'file': 'tests/gaze/io/files/binocular_example.feather',
+            },
+            (10, 3),
+            id='feather_bino_shape',
+        ),
+    ],
+)
+def test_shapes(kwargs, shape):
+    gaze_dataframe = pm.gaze.from_ipc(**kwargs)
+
+    assert gaze_dataframe.frame.shape == shape

From 8fcb7d335b1aa4dcc0f2862bf9dd992c771ee399 Mon Sep 17 00:00:00 2001
From: "Daniel G. Krakowczyk" <daniel.krakowczyk@uni-potsdam.de>
Date: Fri, 22 Sep 2023 18:50:03 +0200
Subject: [PATCH 11/20] tests: Test resulting filestructure for
 utils.archives.extract() (#570)

---
 tests/utils/archives_test.py | 257 ++++++++++++++++++++++++++++-------
 1 file changed, 205 insertions(+), 52 deletions(-)

diff --git a/tests/utils/archives_test.py b/tests/utils/archives_test.py
index 96de4477c..f0cfb70c1 100644
--- a/tests/utils/archives_test.py
+++ b/tests/utils/archives_test.py
@@ -21,6 +21,7 @@
 import bz2
 import gzip
 import lzma
+import os
 import pathlib
 import tarfile
 import zipfile
@@ -55,12 +56,9 @@ def test_detect_file_type_no_suffixes():
     params=[
         (None, 'tar'),
         (None, 'zip'),
-        ('bz2', None),
-        ('gz', None),
         ('tbz', None),
         ('tbz2', None),
         ('tgz', None),
-        ('xz', None),
         ('bz2', 'tar'),
         ('bz2', 'zip'),
         ('gz', 'tar'),
@@ -70,12 +68,9 @@ def test_detect_file_type_no_suffixes():
     ids=[
         'tar_archive',
         'zip_archive',
-        'bz2_compressed_archive',
-        'gz_compressed_archive',
         'tbz_compressed_archive',
         'tbz2_compressed_archive',
         'tgz_compressed_archive',
-        'xz_compressed_archive',
         'bz2_compressed_tar_archive',
         'bz2_compressed_zip_archive',
         'gz_compressed_tar_archive',
@@ -89,18 +84,14 @@ def fixture_archive(request, tmp_path):
 
     # write tmp filepath
     test_filepath = rootpath / 'test.file'
-    if extension in {'zip', 'tar'}:
-        test_filepath.write_text('test')
-    if compression in {'bz2', 'xz', 'tbz', 'tbz2', 'tgz', 'gz'} and extension is None:
-        test_filepath.write_bytes(b'test')
+    test_filepath.write_text('test')
+
+    top_level_directory = 'toplevel'
 
     # add additional archive
     filepath = rootpath / 'recursive.zip'
     with zipfile.ZipFile(filepath, 'w') as zip_open:
-        zip_open.write(test_filepath)
-
-    # now remove original file again
-    test_filepath.unlink()
+        zip_open.write(test_filepath, arcname=test_filepath.name)
 
     # declare archive path
     if compression is None:
@@ -112,19 +103,16 @@ def fixture_archive(request, tmp_path):
 
     if compression is None and extension == 'zip':
         with zipfile.ZipFile(archive_path, 'w') as zip_open:
-            zip_open.write(filepath)
-        yield archive_path
+            zip_open.write(filepath, arcname=os.path.join(top_level_directory, filepath.name))
 
     elif compression is not None and extension == 'zip':
         comp_type = _ZIP_COMPRESSION_MAP[f'.{compression}']
         with zipfile.ZipFile(archive_path, 'w', compression=comp_type) as zip_open:
-            zip_open.write(filepath)
-        yield archive_path
+            zip_open.write(filepath, arcname=os.path.join(top_level_directory, filepath.name))
 
     elif compression is None and extension == 'tar':
         with tarfile.TarFile.open(archive_path, 'w') as fp:
-            fp.add(filepath)
-        yield archive_path
+            fp.add(filepath, arcname=os.path.join(top_level_directory, filepath.name))
 
     elif (
         (compression is not None and extension == 'tar') or
@@ -135,26 +123,61 @@ def fixture_archive(request, tmp_path):
         if compression in {'tgz'}:
             compression = 'gz'
         with tarfile.TarFile.open(archive_path, f'w:{compression}') as fp:
-            fp.add(filepath)
-        yield archive_path
+            fp.add(filepath, arcname=os.path.join(top_level_directory, filepath.name))
+
+    else:
+        raise ValueError(f'{request.param} not supported for archive fixture')
+
+    # now remove original files again
+    test_filepath.unlink()
+    filepath.unlink()
+
+    yield archive_path
+
+
+@pytest.fixture(
+    name='compressed_file',
+    params=[
+        'bz2',
+        'gz',
+        'xz',
+    ],
+    ids=[
+        'bz2_compressed_file',
+        'gz_compressed_file',
+        'xz_compressed_file',
+    ],
+)
+def fixture_compressed_file(request, tmp_path):
+    rootpath = tmp_path
+    compression = request.param
 
-    elif compression == 'bz2' and extension is None:
-        with bz2.open(archive_path, 'wb') as fp:
-            fp.write(filepath.read_bytes())
-        yield archive_path
+    # write tmp filepath
+    test_filepath = rootpath / 'test.file'
+    test_filepath.write_bytes(b'test')
 
-    elif compression == 'gz' and extension is None:
-        with gzip.open(archive_path, 'wb') as fp:
-            fp.write(filepath.read_bytes())
-        yield archive_path
+    # declare archive path
+    compressed_filepath = rootpath / f'test.{compression}'
 
-    elif compression == 'xz' and extension is None:
-        with lzma.open(archive_path, 'wb') as fp:
-            fp.write(filepath.read_bytes())
-        yield archive_path
+    if compression == 'bz2':
+        with bz2.open(compressed_filepath, 'wb') as fp:
+            fp.write(test_filepath.read_bytes())
+
+    elif compression == 'gz':
+        with gzip.open(compressed_filepath, 'wb') as fp:
+            fp.write(test_filepath.read_bytes())
+
+    elif compression == 'xz':
+        with lzma.open(compressed_filepath, 'wb') as fp:
+            fp.write(test_filepath.read_bytes())
 
     else:
-        raise ValueError(f'{request.param} not supported for archive fixture')
+        raise ValueError(f'{request.param} not supported for compressed file fixture')
+
+    # now remove original file again
+    test_filepath.unlink()
+
+    yield compressed_filepath
 
 
 @pytest.fixture(
@@ -181,26 +204,90 @@ def fixture_unsupported_archive(request, tmp_path):
 
 
 @pytest.mark.parametrize(
-    'recursive',
+    ('recursive', 'remove_finished', 'expected_files'),
     [
-        pytest.param(False, id='recursive_false'),
-        pytest.param(True, id='recursive_true'),
+        pytest.param(
+            False, False,
+            (
+                'toplevel',
+                os.path.join('toplevel', 'recursive.zip'),
+            ),
+            id='recursive_false_remove_finished_false',
+        ),
+        pytest.param(
+            False, True,
+            (
+                'toplevel',
+                os.path.join('toplevel', 'recursive.zip'),
+            ),
+            id='recursive_false_remove_finished_true',
+        ),
+        pytest.param(
+            True, False,
+            (
+                'toplevel',
+                os.path.join('toplevel', 'recursive.zip'),
+                os.path.join('toplevel', 'recursive'),
+                os.path.join('toplevel', 'recursive', 'test.file'),
+            ),
+            id='recursive_true_remove_finished_false',
+        ),
+        pytest.param(
+            True, True,
+            (
+                'toplevel',
+                os.path.join('toplevel', 'recursive'),
+                os.path.join('toplevel', 'recursive', 'test.file'),
+            ),
+            id='recursive_true_remove_finished_true',
+        ),
     ],
 )
+def test_extract_archive_destination_path_None(
+        recursive, remove_finished, expected_files, archive, tmp_path,
+):
+    extract_archive(
+        source_path=archive,
+        destination_path=None,
+        recursive=recursive,
+        remove_finished=remove_finished,
+    )
+    result_files = {
+        str(file.relative_to(archive.parent)) for file in archive.parent.rglob('*')
+    }
+
+    expected_files = set(expected_files)
+    if not remove_finished:
+        expected_files.add(archive.name)
+    assert result_files == expected_files
+
+
 @pytest.mark.parametrize(
-    'remove_finished',
+    ('recursive', 'remove_finished'),
     [
-        pytest.param(False, id='remove_finished_false'),
-        pytest.param(True, id='remove_finished_true'),
+        pytest.param(False, False, id='recursive_false_remove_finished_false'),
+        pytest.param(False, True, id='recursive_false_remove_finished_true'),
+        pytest.param(True, False, id='recursive_true_remove_finished_false'),
+        pytest.param(True, True, id='recursive_true_remove_finished_true'),
     ],
 )
-def test_extract_archive_destination_path_None(recursive, remove_finished, archive):
+def test_extract_compressed_file_destination_path_None(
+        recursive, remove_finished, compressed_file, tmp_path,
+):
     extract_archive(
-        source_path=archive,
+        source_path=compressed_file,
         destination_path=None,
         recursive=recursive,
         remove_finished=remove_finished,
     )
+    result_files = {
+        str(file.relative_to(compressed_file.parent)) for file in compressed_file.parent.rglob('*')
+    }
+
+    expected_files = {'test'}
+    if not remove_finished:
+        expected_files.add(compressed_file.name)
+    assert result_files == expected_files
 
 
 @pytest.mark.parametrize(
@@ -235,27 +322,93 @@ def test_extract_unsupported_archive_destination_path_None(
 
 
 @pytest.mark.parametrize(
-    'recursive',
+    ('recursive', 'remove_finished', 'expected_files'),
     [
-        pytest.param(False, id='recursive_false'),
-        pytest.param(True, id='recursive_true'),
+        pytest.param(
+            False, False,
+            (
+                'toplevel',
+                os.path.join('toplevel', 'recursive.zip'),
+            ),
+            id='recursive_false_remove_finished_false',
+        ),
+        pytest.param(
+            False, True,
+            (
+                'toplevel',
+                os.path.join('toplevel', 'recursive.zip'),
+            ),
+            id='recursive_false_remove_finished_true',
+        ),
+        pytest.param(
+            True, False,
+            (
+                'toplevel',
+                os.path.join('toplevel', 'recursive.zip'),
+                os.path.join('toplevel', 'recursive'),
+                os.path.join('toplevel', 'recursive', 'test.file'),
+            ),
+            id='recursive_true_remove_finished_false',
+        ),
+        pytest.param(
+            True, True,
+            (
+                'toplevel',
+                os.path.join('toplevel', 'recursive'),
+                os.path.join('toplevel', 'recursive', 'test.file'),
+            ),
+            id='recursive_true_remove_finished_true',
+        ),
     ],
 )
+def test_extract_archive_destination_path_not_None(
+        recursive, remove_finished, archive, tmp_path, expected_files,
+):
+    destination_path = tmp_path / pathlib.Path('tmpfoo')
+    extract_archive(
+        source_path=archive,
+        destination_path=destination_path,
+        recursive=recursive,
+        remove_finished=remove_finished,
+    )
+
+    if destination_path.is_file():
+        destination_path = destination_path.parent
+
+    result_files = {str(file.relative_to(destination_path)) for file in destination_path.rglob('*')}
+
+    assert result_files == set(expected_files)
+    assert archive.is_file() != remove_finished
+
+
 @pytest.mark.parametrize(
-    'remove_finished',
+    ('recursive', 'remove_finished'),
     [
-        pytest.param(False, id='remove_finished_false'),
-        pytest.param(True, id='remove_finished_true'),
+        pytest.param(False, False, id='recursive_false_remove_finished_false'),
+        pytest.param(False, True, id='recursive_false_remove_finished_true'),
+        pytest.param(True, False, id='recursive_true_remove_finished_false'),
+        pytest.param(True, True, id='recursive_true_remove_finished_true'),
     ],
 )
-def test_extract_archive_destination_path_not_None(recursive, remove_finished, archive, tmp_path):
-    destination_path = tmp_path / pathlib.Path('tmpfoo')
+def test_extract_compressed_file_destination_path_not_None(
+        recursive, remove_finished, compressed_file, tmp_path,
+):
+    destination_filename = 'tmpfoo'
+    destination_path = tmp_path / pathlib.Path(destination_filename)
     extract_archive(
-        source_path=archive,
+        source_path=compressed_file,
         destination_path=destination_path,
         recursive=recursive,
         remove_finished=remove_finished,
     )
+    result_files = {
+        str(file.relative_to(compressed_file.parent)) for file in compressed_file.parent.rglob('*')
+    }
+
+    expected_files = {destination_filename}
+    if not remove_finished:
+        expected_files.add(compressed_file.name)
+    assert result_files == expected_files
 
 
 @pytest.mark.parametrize(

From 339cd1558433dfafe9431ce63bdbffec84edf820 Mon Sep 17 00:00:00 2001
From: Josephine Funken <funken@uni-bremen.de>
Date: Mon, 25 Sep 2023 15:49:45 +0200
Subject: [PATCH 12/20] Added functionality and tests to remove top-level
 directories

---
 src/pymovements/utils/archives.py | 22 ++++++---
 tests/utils/archives_test.py      | 77 +++++++++++++++++++------------
 2 files changed, 63 insertions(+), 36 deletions(-)

diff --git a/src/pymovements/utils/archives.py b/src/pymovements/utils/archives.py
index 93994fa24..2e3b412b5 100644
--- a/src/pymovements/utils/archives.py
+++ b/src/pymovements/utils/archives.py
@@ -92,6 +92,20 @@ def extract_archive(
     if remove_finished:
         source_path.unlink()
 
+    if remove_top_level:
+        children = [f.path for f in os.scandir(destination_path)]
+        # Check if top-level directory has a single child
+        if len(children) == 1:
+            single_child = children[0]
+            shutil.copytree(single_child, destination_path, dirs_exist_ok=True)
+            shutil.rmtree(single_child)
+        # Check if top-level directory has just the two children archive and extracted archive
+        elif len(children) == 2 and destination_path == source_path.parent:
+            # Name of extracted archive is shorter because it has no extension
+            single_child = children[0] if (len(children[0]) < len(children[1])) else children[1]
+            shutil.copytree(single_child, destination_path, dirs_exist_ok=True)
+            shutil.rmtree(single_child)
+
     if recursive:
         # Get filepaths of all archives in extracted directory.
         archive_extensions = [
@@ -111,16 +125,10 @@ def extract_archive(
                 destination_path=extract_destination,
                 recursive=recursive,
                 remove_finished=remove_finished,
+                remove_top_level=False,
                 verbose=0 if verbose < 2 else 2,
             )
 
-    if remove_top_level:
-        # Check if top-level directory has a single child
-        if len([f.path for f in os.scandir(destination_path)]) == 1:
-            single_child = [f.path for f in os.scandir(destination_path)][0]
-            shutil.copytree(single_child, destination_path, dirs_exist_ok=True)
-            shutil.rmtree(single_child)
-
     return destination_path
 
 
diff --git a/tests/utils/archives_test.py b/tests/utils/archives_test.py
index f0cfb70c1..05134a8d5 100644
--- a/tests/utils/archives_test.py
+++ b/tests/utils/archives_test.py
@@ -204,10 +204,10 @@ def fixture_unsupported_archive(request, tmp_path):
 
 
 @pytest.mark.parametrize(
-    ('recursive', 'remove_finished', 'expected_files'),
+    ('recursive', 'remove_finished', 'remove_top_level', 'expected_files'),
     [
         pytest.param(
-            False, False,
+            False, False, False,
             (
                 'toplevel',
                 os.path.join('toplevel', 'recursive.zip'),
@@ -215,7 +215,7 @@ def fixture_unsupported_archive(request, tmp_path):
             id='recursive_false_remove_finished_false',
         ),
         pytest.param(
-            False, True,
+            False, True, False,
             (
                 'toplevel',
                 os.path.join('toplevel', 'recursive.zip'),
@@ -223,7 +223,7 @@ def fixture_unsupported_archive(request, tmp_path):
             id='recursive_false_remove_finished_true',
         ),
         pytest.param(
-            True, False,
+            True, False, False,
             (
                 'toplevel',
                 os.path.join('toplevel', 'recursive.zip'),
@@ -233,7 +233,7 @@ def fixture_unsupported_archive(request, tmp_path):
             id='recursive_true_remove_finished_false',
         ),
         pytest.param(
-            True, True,
+            True, True, False,
             (
                 'toplevel',
                 os.path.join('toplevel', 'recursive'),
@@ -241,16 +241,33 @@ def fixture_unsupported_archive(request, tmp_path):
             ),
             id='recursive_true_remove_finished_true',
         ),
+        pytest.param(
+            False, False, True,
+            (
+                'recursive.zip',
+            ),
+            id='recursive_false_remove_top_level_true',
+        ),
+        pytest.param(
+            True, False, True,
+            (
+                'recursive',
+                'recursive.zip',
+                os.path.join('recursive', 'test.file'),
+            ),
+            id='recursive_true_remove_top_level_true',
+        ),
     ],
 )
 def test_extract_archive_destination_path_None(
-        recursive, remove_finished, expected_files, archive, tmp_path,
+        recursive, remove_finished, remove_top_level, expected_files, archive,
 ):
     extract_archive(
         source_path=archive,
         destination_path=None,
         recursive=recursive,
         remove_finished=remove_finished,
+        remove_top_level=remove_top_level,
     )
     result_files = {
         str(file.relative_to(archive.parent)) for file in archive.parent.rglob('*')
@@ -272,7 +289,7 @@ def test_extract_archive_destination_path_None(
     ],
 )
 def test_extract_compressed_file_destination_path_None(
-        recursive, remove_finished, compressed_file, tmp_path,
+        recursive, remove_finished, compressed_file,
 ):
     extract_archive(
         source_path=compressed_file,
@@ -322,10 +339,10 @@ def test_extract_unsupported_archive_destination_path_None(
 
 
 @pytest.mark.parametrize(
-    ('recursive', 'remove_finished', 'expected_files'),
+    ('recursive', 'remove_finished', 'remove_top_level', 'expected_files'),
     [
         pytest.param(
-            False, False,
+            False, False, False,
             (
                 'toplevel',
                 os.path.join('toplevel', 'recursive.zip'),
@@ -333,7 +350,7 @@ def test_extract_unsupported_archive_destination_path_None(
             id='recursive_false_remove_finished_false',
         ),
         pytest.param(
-            False, True,
+            False, True, False,
             (
                 'toplevel',
                 os.path.join('toplevel', 'recursive.zip'),
@@ -341,7 +358,7 @@ def test_extract_unsupported_archive_destination_path_None(
             id='recursive_false_remove_finished_true',
         ),
         pytest.param(
-            True, False,
+            True, False, False,
             (
                 'toplevel',
                 os.path.join('toplevel', 'recursive.zip'),
@@ -351,7 +368,7 @@ def test_extract_unsupported_archive_destination_path_None(
             id='recursive_true_remove_finished_false',
         ),
         pytest.param(
-            True, True,
+            True, True, False,
             (
                 'toplevel',
                 os.path.join('toplevel', 'recursive'),
@@ -359,10 +376,26 @@ def test_extract_unsupported_archive_destination_path_None(
             ),
             id='recursive_true_remove_finished_true',
         ),
+        pytest.param(
+            False, False, True,
+            (
+                'recursive.zip',
+            ),
+            id='recursive_false_remove_top_level_true',
+        ),
+        pytest.param(
+            True, False, True,
+            (
+                'recursive',
+                'recursive.zip',
+                os.path.join('recursive', 'test.file'),
+            ),
+            id='recursive_true_remove_top_level_true',
+        ),
     ],
 )
 def test_extract_archive_destination_path_not_None(
-        recursive, remove_finished, archive, tmp_path, expected_files,
+        recursive, remove_finished, remove_top_level, archive, tmp_path, expected_files,
 ):
     destination_path = tmp_path / pathlib.Path('tmpfoo')
     extract_archive(
@@ -370,6 +403,7 @@ def test_extract_archive_destination_path_not_None(
         destination_path=destination_path,
         recursive=recursive,
         remove_finished=remove_finished,
+        remove_top_level=remove_top_level,
     )
 
     if destination_path.is_file():
@@ -425,7 +459,7 @@ def test_extract_compressed_file_destination_path_not_None(
         pytest.param(True, id='remove_finished_true'),
     ],
 )
-def test_extract_unnsupported_archive_destination_path_not_None(
+def test_extract_unsupported_archive_destination_path_not_None(
         recursive,
         remove_finished,
         unsupported_archive,
@@ -449,18 +483,3 @@ def test_decompress_unknown_compression_suffix():
         _decompress(pathlib.Path('test.zip.zip'))
     msg, = excinfo.value.args
     assert msg == "Couldn't detect a compression from suffix .zip."
-
-
-@pytest.mark.parametrize(
-    'remove_top_level',
-    [
-        pytest.param(False, id='remove_top_level_false'),
-        pytest.param(True, id='remove_top_level_true'),
-    ],
-)
-def test_remove_top_level(remove_top_level, archive):
-    extract_archive(
-        source_path=archive,
-        destination_path=None,
-        remove_top_level=remove_top_level,
-    )

From b95ed7fdd3c62fff7ae39eb941b1523f5270e084 Mon Sep 17 00:00:00 2001
From: Josephine Funken <funken@uni-bremen.de>
Date: Tue, 26 Sep 2023 09:23:59 +0200
Subject: [PATCH 13/20] Added functionality for recursive function application

---
 src/pymovements/utils/archives.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/pymovements/utils/archives.py b/src/pymovements/utils/archives.py
index 2e3b412b5..99a1b9232 100644
--- a/src/pymovements/utils/archives.py
+++ b/src/pymovements/utils/archives.py
@@ -97,14 +97,16 @@ def extract_archive(
         # Check if top-level directory has a single child
         if len(children) == 1:
             single_child = children[0]
-            shutil.copytree(single_child, destination_path, dirs_exist_ok=True)
-            shutil.rmtree(single_child)
+            if os.path.isdir(single_child):
+                shutil.copytree(single_child, destination_path, dirs_exist_ok=True)
+                shutil.rmtree(single_child)
         # Check if top-level directory has just the two children archive and extracted archive
         elif len(children) == 2 and destination_path == source_path.parent:
             # Name of extracted archive is shorter because it has no extension
             single_child = children[0] if (len(children[0]) < len(children[1])) else children[1]
-            shutil.copytree(single_child, destination_path, dirs_exist_ok=True)
-            shutil.rmtree(single_child)
+            if os.path.isdir(single_child):
+                shutil.copytree(single_child, destination_path, dirs_exist_ok=True)
+                shutil.rmtree(single_child)
 
     if recursive:
         # Get filepaths of all archives in extracted directory.
@@ -125,7 +127,7 @@ def extract_archive(
                 destination_path=extract_destination,
                 recursive=recursive,
                 remove_finished=remove_finished,
-                remove_top_level=False,
+                remove_top_level=remove_top_level,
                 verbose=0 if verbose < 2 else 2,
             )
 

From 22bf1f87068e76192379802ab6adfc7c95475869 Mon Sep 17 00:00:00 2001
From: Josephine Funken <funken@uni-bremen.de>
Date: Wed, 27 Sep 2023 11:08:23 +0200
Subject: [PATCH 14/20] Added funtionality to remove top-level directory to
 downloads

---
 src/pymovements/utils/downloads.py | 4 ++++
 tests/utils/downloads_test.py      | 9 ++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/pymovements/utils/downloads.py b/src/pymovements/utils/downloads.py
index fc053fd0c..3a245496d 100644
--- a/src/pymovements/utils/downloads.py
+++ b/src/pymovements/utils/downloads.py
@@ -40,6 +40,7 @@ def download_and_extract_archive(
         md5: str | None = None,
         recursive: bool = True,
         remove_finished: bool = False,
+        remove_top_level: bool = True,
         verbose: int = 1,
 ) -> None:
     """Download and extract archive file.
@@ -60,6 +61,8 @@ def download_and_extract_archive(
         Recursively extract archives which are included in extracted archive.
     remove_finished : bool
         Remove downloaded file after successful extraction or decompression, default: False.
+    remove_top_level: bool
+        If ``True``, remove the top-level directory if it has only one child, default:True.
     verbose : int
         Verbosity levels: (1) Show download progress bar and print info messages on downloading
         and extracting archive files without printing messages for recursive archive extraction.
@@ -87,6 +90,7 @@ def download_and_extract_archive(
         destination_path=extract_dirpath,
         recursive=recursive,
         remove_finished=remove_finished,
+        remove_top_level=remove_top_level,
         verbose=verbose,
     )
 
diff --git a/tests/utils/downloads_test.py b/tests/utils/downloads_test.py
index ae6ff3bc1..6d2b3a82c 100644
--- a/tests/utils/downloads_test.py
+++ b/tests/utils/downloads_test.py
@@ -156,7 +156,14 @@ def test_download_and_extract_archive(tmp_path):
     md5 = '52bbf03a7c50ee7152ccb9d357c2bb30'
     extract_dirpath = tmp_path / 'extracted'
 
-    download_and_extract_archive(url, tmp_path, download_filename, extract_dirpath, md5)
+    download_and_extract_archive(
+        url,
+        tmp_path,
+        download_filename,
+        extract_dirpath,
+        md5,
+        remove_top_level=False,
+    )
 
     assert extract_dirpath.exists()
     assert (extract_dirpath / 'pymovements-0.4.0').exists()

From 2c643dde12af99cb50f3240e8652c94674842357 Mon Sep 17 00:00:00 2001
From: Josephine Funken <funken@uni-bremen.de>
Date: Wed, 27 Sep 2023 11:28:37 +0200
Subject: [PATCH 15/20] Changed test_dataset_extract_remove_finished_false to
 include new param

---
 tests/dataset/dataset_download_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/dataset/dataset_download_test.py b/tests/dataset/dataset_download_test.py
index ca26acbe9..e3ca771fb 100644
--- a/tests/dataset/dataset_download_test.py
+++ b/tests/dataset/dataset_download_test.py
@@ -214,7 +214,7 @@ def test_dataset_extract_remove_finished_true(
 
     paths = pm.DatasetPaths(root=tmp_path, dataset='.')
     dataset = pm.Dataset(dataset_definition, path=paths)
-    dataset.extract(remove_finished=True, verbose=1)
+    dataset.extract(remove_finished=True, remove_top_level=False, verbose=1)
 
     mock_extract_archive.assert_has_calls([
         mock.call(
@@ -222,6 +222,7 @@ def test_dataset_extract_remove_finished_true(
             destination_path=tmp_path / 'raw',
             recursive=True,
             remove_finished=True,
+            remove_top_level=False,
             verbose=1,
         ),
     ])

From 8d28f93dba06cdd79acfaee50c2ee258ba1df2f7 Mon Sep 17 00:00:00 2001
From: Josephine Funken <funken@uni-bremen.de>
Date: Wed, 27 Sep 2023 11:35:05 +0200
Subject: [PATCH 16/20] Added new param to existing tests

---
 tests/dataset/dataset_download_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/dataset/dataset_download_test.py b/tests/dataset/dataset_download_test.py
index e3ca771fb..478be9788 100644
--- a/tests/dataset/dataset_download_test.py
+++ b/tests/dataset/dataset_download_test.py
@@ -246,6 +246,7 @@ def test_dataset_extract_remove_finished_false(
             destination_path=tmp_path / 'raw',
             recursive=True,
             remove_finished=False,
+            remove_top_level=True,
             verbose=1,
         ),
     ])

From 6452948d39722ef08f2cb2e4bf2de53d7858f05a Mon Sep 17 00:00:00 2001
From: Josephine Funken <funken@uni-bremen.de>
Date: Wed, 27 Sep 2023 13:03:05 +0200
Subject: [PATCH 17/20] Changed line to make work on all os

---
 src/pymovements/utils/archives.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pymovements/utils/archives.py b/src/pymovements/utils/archives.py
index 99a1b9232..99c2e9d54 100644
--- a/src/pymovements/utils/archives.py
+++ b/src/pymovements/utils/archives.py
@@ -93,7 +93,7 @@ def extract_archive(
         source_path.unlink()
 
     if remove_top_level:
-        children = [f.path for f in os.scandir(destination_path)]
+        children = [str(file) for file in destination_path.glob('*')]
         # Check if top-level directory has a single child
         if len(children) == 1:
             single_child = children[0]

From 9645f501897a967aa0ed889b624b8c3cbc2711bd Mon Sep 17 00:00:00 2001
From: Josephine Funken <funken@uni-bremen.de>
Date: Wed, 27 Sep 2023 14:45:45 +0200
Subject: [PATCH 18/20] Changes to how the single child is detected when there
 is no destination path

---
 src/pymovements/utils/archives.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pymovements/utils/archives.py b/src/pymovements/utils/archives.py
index 99c2e9d54..7f771fea2 100644
--- a/src/pymovements/utils/archives.py
+++ b/src/pymovements/utils/archives.py
@@ -102,8 +102,8 @@ def extract_archive(
                 shutil.rmtree(single_child)
         # Check if top-level directory has just the two children archive and extracted archive
         elif len(children) == 2 and destination_path == source_path.parent:
-            # Name of extracted archive is shorter because it has no extension
-            single_child = children[0] if (len(children[0]) < len(children[1])) else children[1]
+            # Name of extracted archive has no suffix
+            single_child = children[1] if (Path(children[0]).suffixes != []) else children[0]
             if os.path.isdir(single_child):
                 shutil.copytree(single_child, destination_path, dirs_exist_ok=True)
                 shutil.rmtree(single_child)

From 1503ab13cc84868df33455e0b0f76ea70f91499a Mon Sep 17 00:00:00 2001
From: Josephine Funken <funken@uni-bremen.de>
Date: Wed, 27 Sep 2023 15:04:04 +0200
Subject: [PATCH 19/20] Changed check if single child is a directory

---
 src/pymovements/utils/archives.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/pymovements/utils/archives.py b/src/pymovements/utils/archives.py
index 7f771fea2..d261f5d59 100644
--- a/src/pymovements/utils/archives.py
+++ b/src/pymovements/utils/archives.py
@@ -23,7 +23,6 @@
 import bz2
 import gzip
 import lzma
-import os
 import shutil
 import tarfile
 import zipfile
@@ -97,14 +96,14 @@ def extract_archive(
         # Check if top-level directory has a single child
         if len(children) == 1:
             single_child = children[0]
-            if os.path.isdir(single_child):
+            if Path.is_dir(Path(single_child)):
                 shutil.copytree(single_child, destination_path, dirs_exist_ok=True)
                 shutil.rmtree(single_child)
         # Check if top-level directory has just the two children archive and extracted archive
         elif len(children) == 2 and destination_path == source_path.parent:
             # Name of extracted archive has no suffix
-            single_child = children[1] if (Path(children[0]).suffixes != []) else children[0]
-            if os.path.isdir(single_child):
+            single_child = children[0] if (Path(children[0]).suffixes == []) else children[1]
+            if Path.is_dir(Path(single_child)):
                 shutil.copytree(single_child, destination_path, dirs_exist_ok=True)
                 shutil.rmtree(single_child)
 

From 3829464bd2593b58c5992d016e20ef7c2ac9db7b Mon Sep 17 00:00:00 2001
From: Josephine Funken <funken@uni-bremen.de>
Date: Thu, 12 Oct 2023 13:08:32 +0200
Subject: [PATCH 20/20] Changed functionality to remove top level and
 corresponding tests

---
 src/pymovements/utils/archives.py | 27 ++++++++++-----------
 tests/utils/archives_test.py      | 40 ++++++++++++++++++++-----------
 2 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/src/pymovements/utils/archives.py b/src/pymovements/utils/archives.py
index d261f5d59..96a269095 100644
--- a/src/pymovements/utils/archives.py
+++ b/src/pymovements/utils/archives.py
@@ -23,6 +23,7 @@
 import bz2
 import gzip
 import lzma
+import os
 import shutil
 import tarfile
 import zipfile
@@ -92,20 +93,18 @@ def extract_archive(
         source_path.unlink()
 
     if remove_top_level:
-        children = [str(file) for file in destination_path.glob('*')]
-        # Check if top-level directory has a single child
-        if len(children) == 1:
-            single_child = children[0]
-            if Path.is_dir(Path(single_child)):
-                shutil.copytree(single_child, destination_path, dirs_exist_ok=True)
-                shutil.rmtree(single_child)
-        # Check if top-level directory has just the two children archive and extracted archive
-        elif len(children) == 2 and destination_path == source_path.parent:
-            # Name of extracted archive has no suffix
-            single_child = children[0] if (Path(children[0]).suffixes == []) else children[1]
-            if Path.is_dir(Path(single_child)):
-                shutil.copytree(single_child, destination_path, dirs_exist_ok=True)
-                shutil.rmtree(single_child)
+        # path of extracted archive
+        extract_destination = destination_path / source_path.stem
+        if destination_path.stem == source_path.stem:
+            extract_destination = destination_path
+
+        # check if extracted archive has single child that is a directory
+        children = [str(file) for file in extract_destination.glob('*')]
+        if len(children) == 1 and os.path.isdir(children[0]):
+            # move contents of single child and remove it
+            for f in [str(file) for file in Path(children[0]).glob('*')]:
+                shutil.move(f, extract_destination)
+            Path(children[0]).rmdir()
 
     if recursive:
         # Get filepaths of all archives in extracted directory.
diff --git a/tests/utils/archives_test.py b/tests/utils/archives_test.py
index 05134a8d5..b3fe41fb1 100644
--- a/tests/utils/archives_test.py
+++ b/tests/utils/archives_test.py
@@ -86,12 +86,16 @@ def fixture_archive(request, tmp_path):
     test_filepath = rootpath / 'test.file'
     test_filepath.write_text('test')
 
+    single_child_directory = 'singlechild'
     top_level_directory = 'toplevel'
 
     # add additional archive
     filepath = rootpath / 'recursive.zip'
     with zipfile.ZipFile(filepath, 'w') as zip_open:
-        zip_open.write(test_filepath, arcname=test_filepath.name)
+        zip_open.write(
+            test_filepath,
+            arcname=os.path.join(single_child_directory, test_filepath.name),
+        )
 
     # declare archive path
     if compression is None:
@@ -228,7 +232,8 @@ def fixture_unsupported_archive(request, tmp_path):
                 'toplevel',
                 os.path.join('toplevel', 'recursive.zip'),
                 os.path.join('toplevel', 'recursive'),
-                os.path.join('toplevel', 'recursive', 'test.file'),
+                os.path.join('toplevel', 'recursive', 'singlechild'),
+                os.path.join('toplevel', 'recursive', 'singlechild', 'test.file'),
             ),
             id='recursive_true_remove_finished_false',
         ),
@@ -237,23 +242,26 @@ def fixture_unsupported_archive(request, tmp_path):
             (
                 'toplevel',
                 os.path.join('toplevel', 'recursive'),
-                os.path.join('toplevel', 'recursive', 'test.file'),
+                os.path.join('toplevel', 'recursive', 'singlechild'),
+                os.path.join('toplevel', 'recursive', 'singlechild', 'test.file'),
             ),
             id='recursive_true_remove_finished_true',
         ),
         pytest.param(
             False, False, True,
             (
-                'recursive.zip',
+                'toplevel',
+                os.path.join('toplevel', 'recursive.zip'),
             ),
             id='recursive_false_remove_top_level_true',
         ),
         pytest.param(
             True, False, True,
             (
-                'recursive',
-                'recursive.zip',
-                os.path.join('recursive', 'test.file'),
+                'toplevel',
+                os.path.join('toplevel', 'recursive.zip'),
+                os.path.join('toplevel', 'recursive'),
+                os.path.join('toplevel', 'recursive', 'test.file'),
             ),
             id='recursive_true_remove_top_level_true',
         ),
@@ -361,9 +369,10 @@ def test_extract_unsupported_archive_destination_path_None(
             True, False, False,
             (
                 'toplevel',
-                os.path.join('toplevel', 'recursive.zip'),
                 os.path.join('toplevel', 'recursive'),
-                os.path.join('toplevel', 'recursive', 'test.file'),
+                os.path.join('toplevel', 'recursive.zip'),
+                os.path.join('toplevel', 'recursive', 'singlechild'),
+                os.path.join('toplevel', 'recursive', 'singlechild', 'test.file'),
             ),
             id='recursive_true_remove_finished_false',
         ),
@@ -372,23 +381,26 @@ def test_extract_unsupported_archive_destination_path_None(
             (
                 'toplevel',
                 os.path.join('toplevel', 'recursive'),
-                os.path.join('toplevel', 'recursive', 'test.file'),
+                os.path.join('toplevel', 'recursive', 'singlechild'),
+                os.path.join('toplevel', 'recursive', 'singlechild', 'test.file'),
             ),
             id='recursive_true_remove_finished_true',
         ),
         pytest.param(
             False, False, True,
             (
-                'recursive.zip',
+                'toplevel',
+                os.path.join('toplevel', 'recursive.zip'),
             ),
             id='recursive_false_remove_top_level_true',
         ),
         pytest.param(
             True, False, True,
             (
-                'recursive',
-                'recursive.zip',
-                os.path.join('recursive', 'test.file'),
+                'toplevel',
+                os.path.join('toplevel', 'recursive'),
+                os.path.join('toplevel', 'recursive.zip'),
+                os.path.join('toplevel', 'recursive', 'test.file'),
             ),
             id='recursive_true_remove_top_level_true',
         ),