Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Yaml public datasets loader #914

Draft
wants to merge 20 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ repos:
rev: v1.13.0
hooks:
- id: mypy
additional_dependencies: [pandas-stubs, types-tqdm]
additional_dependencies: [pandas-stubs, types-tqdm, types-PyYAML]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
Expand Down
2 changes: 1 addition & 1 deletion docs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024 The pymovements Project Authors
# Copyright (c) 2023-2025 The pymovements Project Authors
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2024 The pymovements Project Authors
# Copyright (c) 2022-2025 The pymovements Project Authors
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down
4 changes: 1 addition & 3 deletions src/pymovements/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2024 The pymovements Project Authors
# Copyright (c) 2022-2025 The pymovements Project Authors
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -33,7 +33,6 @@
from pymovements.dataset import DatasetDefinition
from pymovements.dataset import DatasetLibrary
from pymovements.dataset import DatasetPaths
from pymovements.dataset import register_dataset
from pymovements.events import EventDataFrame
from pymovements.events import EventGazeProcessor
from pymovements.events import EventProcessor
Expand All @@ -52,7 +51,6 @@
'DatasetLibrary',
'DatasetPaths',
'datasets',
'register_dataset',

'events',
'EventDataFrame',
Expand Down
4 changes: 1 addition & 3 deletions src/pymovements/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2024 The pymovements Project Authors
# Copyright (c) 2022-2025 The pymovements Project Authors
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -33,7 +33,6 @@
from pymovements.dataset.dataset import Dataset
from pymovements.dataset.dataset_definition import DatasetDefinition
from pymovements.dataset.dataset_library import DatasetLibrary
from pymovements.dataset.dataset_library import register_dataset
from pymovements.dataset.dataset_paths import DatasetPaths


Expand All @@ -42,5 +41,4 @@
'DatasetDefinition',
'DatasetLibrary',
'DatasetPaths',
'register_dataset',
]
36 changes: 22 additions & 14 deletions src/pymovements/dataset/dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2024 The pymovements Project Authors
# Copyright (c) 2022-2025 The pymovements Project Authors
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -33,6 +33,7 @@
from pymovements.dataset.dataset_definition import DatasetDefinition
from pymovements.dataset.dataset_library import DatasetLibrary
from pymovements.dataset.dataset_paths import DatasetPaths
from pymovements.dataset.yaml_dataset_loader import YAMLDatasetLoader
from pymovements.events.frame import EventDataFrame
from pymovements.events.precomputed import PrecomputedEventDataFrame
from pymovements.events.processing import EventGazeProcessor
Expand All @@ -47,35 +48,42 @@

Parameters
----------
definition: str | DatasetDefinition | type[DatasetDefinition]
definition: str | DatasetDefinition | Path
Dataset definition to initialize dataset with.
path : str | Path | DatasetPaths
Path to the dataset directory. You can set up a custom directory structure by passing a
:py:class:`~pymovements.DatasetPaths` instance.
path: str | Path | DatasetPaths
Path to the dataset directory. You can set up a custom directory structure
by passing a :py:class:`~pymovements.DatasetPaths` instance.
"""

def __init__(
self,
definition: str | DatasetDefinition | type[DatasetDefinition],
path: str | Path | DatasetPaths,
self,
definition: str | DatasetDefinition | Path,
path: str | Path | DatasetPaths,
):
self.fileinfo: pl.DataFrame = pl.DataFrame()
self.gaze: list[GazeDataFrame] = []
self.events: list[EventDataFrame] = []
self.precomputed_events: list[PrecomputedEventDataFrame] = []
self.precomputed_reading_measures: list[ReadingMeasures] = []

if isinstance(definition, str):
definition = DatasetLibrary.get(definition)()
if isinstance(definition, type):
definition = definition()
self.definition = deepcopy(definition)
# Handle different definition input types
if isinstance(definition, (str, Path)):
# Check if it's a path to a YAML file
if isinstance(definition, Path) or str(definition).endswith('.yaml'):
self.definition = YAMLDatasetLoader.load_dataset_definition(definition)

Check warning on line 73 in src/pymovements/dataset/dataset.py

View check run for this annotation

Codecov / codecov/patch

src/pymovements/dataset/dataset.py#L73

Added line #L73 was not covered by tests
else:
# Try to load from registered datasets
self.definition = DatasetLibrary.get(definition)

Check warning on line 76 in src/pymovements/dataset/dataset.py

View check run for this annotation

Codecov / codecov/patch

src/pymovements/dataset/dataset.py#L76

Added line #L76 was not covered by tests
else:
self.definition = deepcopy(definition)

Check warning on line 78 in src/pymovements/dataset/dataset.py

View check run for this annotation

Codecov / codecov/patch

src/pymovements/dataset/dataset.py#L78

Added line #L78 was not covered by tests

# Handle path setup
if isinstance(path, (str, Path)):
self.paths = DatasetPaths(root=path, dataset='.')
else:
self.paths = deepcopy(path)
# Fill dataset directory name with dataset definition name if specified.

# Fill dataset directory name with dataset definition name if specified
self.paths.fill_name(self.definition.name)

def load(
Expand Down
2 changes: 1 addition & 1 deletion src/pymovements/dataset/dataset_definition.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024 The pymovements Project Authors
# Copyright (c) 2023-2025 The pymovements Project Authors
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down
2 changes: 1 addition & 1 deletion src/pymovements/dataset/dataset_download.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024 The pymovements Project Authors
# Copyright (c) 2023-2025 The pymovements Project Authors
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down
2 changes: 1 addition & 1 deletion src/pymovements/dataset/dataset_files.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024 The pymovements Project Authors
# Copyright (c) 2023-2025 The pymovements Project Authors
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down
85 changes: 52 additions & 33 deletions src/pymovements/dataset/dataset_library.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024 The pymovements Project Authors
# Copyright (c) 2023-2025 The pymovements Project Authors
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand All @@ -20,65 +20,84 @@
"""DatasetLibrary module."""
from __future__ import annotations

from typing import TypeVar
from pathlib import Path

from pymovements.dataset.dataset_definition import DatasetDefinition
from pymovements.dataset.yaml_dataset_loader import YAMLDatasetLoader


class DatasetLibrary:
"""Provides access by name to :py:class:`~pymovements.DatasetDefinition`.
"""Provides access by name to dataset definitions.

Attributes
----------
definitions: dict[str, type[DatasetDefinition]]
Dictionary of :py:class:`~pymovements.DatasetDefinition`.
definitions: dict[str, DatasetDefinition]
Dictionary of dataset definitions, either as classes or instances
"""

definitions: dict[str, type[DatasetDefinition]] = {}
definitions: dict[str, DatasetDefinition] = {}

@classmethod
def add(cls, definition: type[DatasetDefinition]) -> None:
"""Add :py:class:`~pymovements.DatasetDefinition` to library.
def add(cls, definition: DatasetDefinition | Path | str) -> None:
"""Add a dataset definition to library.

Parameters
----------
definition: type[DatasetDefinition]
The :py:class:`~pymovements.DatasetDefinition` to add to the library.
definition: DatasetDefinition | Path | str
The dataset definition to add. Can be:
- A DatasetDefinition class (legacy)
- A DatasetDefinition instance (from YAML)
- A Path to a YAML file
- A string path to a YAML file
"""
cls.definitions[definition.name] = definition
if isinstance(definition, (str, Path)):
# Load from YAML file
yaml_def = YAMLDatasetLoader.load_dataset_definition(definition)
cls.definitions[yaml_def.name] = yaml_def
else:
# DatasetDefinition instance (from YAML)
cls.definitions[definition.name] = definition

Check warning on line 59 in src/pymovements/dataset/dataset_library.py

View check run for this annotation

Codecov / codecov/patch

src/pymovements/dataset/dataset_library.py#L59

Added line #L59 was not covered by tests

@classmethod
def get(cls, name: str) -> type[DatasetDefinition]:
"""Get :py:class:`~pymovements.DatasetDefinition` py name.
def get(cls, name: str) -> DatasetDefinition:
"""Get dataset definition by name.

Parameters
----------
name: str
Name of the :py:class:`~pymovements.DatasetDefinition` in the library.
Name of the dataset definition in the library.

Returns
-------
type[DatasetDefinition]
The :py:class:`~pymovements.DatasetDefinition` in the library.
DatasetDefinition
The dataset definition. Could be either a class (legacy) or instance (YAML).

Raises
------
KeyError
If dataset name not found in library.
"""
if name not in cls.definitions:
raise KeyError(

Check warning on line 81 in src/pymovements/dataset/dataset_library.py

View check run for this annotation

Codecov / codecov/patch

src/pymovements/dataset/dataset_library.py#L81

Added line #L81 was not covered by tests
f"Dataset '{name}' not found in library. "
f"Available datasets: {list(cls.definitions.keys())}",
)
return cls.definitions[name]

@classmethod
def register_yaml_directory(cls, directory: str | Path) -> None:
"""Register all YAML dataset definitions in a directory.

DatsetDefinitionClass = TypeVar('DatsetDefinitionClass', bound=type[DatasetDefinition])


def register_dataset(cls: DatsetDefinitionClass) -> DatsetDefinitionClass:
"""Register a public dataset definition.

Parameters
----------
cls: DatsetDefinitionClass
The :py:class:`~pymovements.DatasetDefinition` to register.
Parameters
----------
directory: str | Path
Directory containing YAML dataset definitions
"""
directory = Path(directory)
for yaml_file in directory.glob('*.yaml'):
cls.add(yaml_file)

Returns
-------
DatsetDefinitionClass
The :py:class:`~pymovements.DatasetDefinition` to register.
"""
DatasetLibrary.add(cls)
return cls
@classmethod
def clear(cls) -> None:
"""Clear all registered datasets."""
cls.definitions.clear()

Check warning on line 103 in src/pymovements/dataset/dataset_library.py

View check run for this annotation

Codecov / codecov/patch

src/pymovements/dataset/dataset_library.py#L103

Added line #L103 was not covered by tests
2 changes: 1 addition & 1 deletion src/pymovements/dataset/dataset_paths.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024 The pymovements Project Authors
# Copyright (c) 2023-2025 The pymovements Project Authors
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down
Loading
Loading