Merge pull request #10 from au-imclab/dev

Skeleton is looking less like an archeological finding...
au-imclab · Dec 4, 2023 · bac2970 · bac2970
2 parents 35d8ab0 + 5c9a591
commit bac2970
Show file tree

Hide file tree

Showing 32 changed files with 1,613 additions and 50 deletions.
diff --git a/.github/workflows/lint-test-pr-dev.yml b/.github/workflows/lint-test-pr-dev.yml
@@ -1,4 +1,4 @@
-name: Test, Build, Publish
+name: Test and Lint (PRs and dev branch)
 
 on:
   pull_request:

diff --git a/pyproject.toml b/pyproject.toml
@@ -75,6 +75,8 @@ dependencies = [
   "mypy>=1.0.0",
   "ruff>=0.0.243",
   "pandas",
+  "pandas-stubs",
+  "types_openpyxl",
   "StrEnum; python_version < '3.11'",
 ]
 [tool.hatch.envs.lint.scripts]

diff --git a/src/mopipe/analysis/pipeline.py b/src/mopipe/analysis/pipeline.py
@@ -0,0 +1,79 @@
+"""pipeline.py
+
+This module contains the Pipeline class, which is used to run a series of
+analysis steps (segments) on the data.
+"""
+
+import typing as t
+
+from mopipe.segments import Segment
+
+
+class Pipeline(t.MutableSequence[Segment]):
+    """Pipeline
+
+    A pipeline is a series of segments that are run on the data.
+    """
+
+    _segments: t.MutableSequence[Segment]
+
+    def __init__(self, segments: t.Optional[t.MutableSequence[Segment]] = None) -> None:
+        """Initialize a Pipeline."""
+        self._segments = [] if segments is None else segments
+
+    @property
+    def segments(self) -> t.MutableSequence[Segment]:
+        """The segments in the pipeline."""
+        return self._segments
+
+    def _check_kwargs(self, **kwargs) -> None:
+        """Check the arguments for the pipeline."""
+        if "input" not in kwargs:
+            msg = "No input provided to pipeline."
+            raise ValueError(msg)
+
+    def segment(self, index: int) -> Segment:
+        """Get a segment from the pipeline."""
+        return self._segments[index]
+
+    def add_segment(self, segment: Segment) -> int:
+        """Add a segment to the pipeline."""
+        self._segments.append(segment)
+        return len(self._segments) - 1
+
+    def run(self, *args, **kwargs) -> t.Any:
+        """Run the pipeline."""
+        output = None
+        self._check_kwargs(**kwargs)
+        for segment in self._segments:
+            # most basic version here
+            # we could also keep track of the output from each step
+            # if that is useful, for now it's just I -> Segment -> O -> Segment -> O -> ...
+            kwargs["input"] = segment(*args, **kwargs)
+        return output
+
+    def __repr__(self) -> str:
+        return f"Pipeline(segments={self._segments})"
+
+    @t.overload
+    def __getitem__(self, index: int) -> Segment:
+        ...
+
+    @t.overload
+    def __getitem__(self, index: slice) -> t.MutableSequence[Segment]:
+        ...
+
+    def __getitem__(self, index: t.Union[int, slice]):
+        return self._segments[index]
+
+    def __len__(self) -> int:
+        return len(self._segments)
+
+    def __iter__(self) -> t.Iterator[Segment]:
+        return iter(self._segments)
+
+    def __reversed__(self) -> t.Iterator[Segment]:
+        return reversed(self._segments)
+
+    def __contains__(self, value: object) -> bool:
+        return value in self._segments
diff --git a/src/mopipe/analysis/py.typed b/src/mopipe/analysis/py.typed
diff --git a/src/mopipe/common/__init__.py b/src/mopipe/common/__init__.py
@@ -1,2 +1,3 @@
 from .datastructs import DataLevel  # noqa: F401, TID252, I001
-from .datastructs import MocapMetadata  # noqa: F401, TID252
+from .datastructs import MocapMetadataEntries  # noqa: F401, TID252
+from .util import maybe_generate_id  # noqa: F401, TID252
diff --git a/src/mopipe/common/datastructs.py b/src/mopipe/common/datastructs.py
@@ -5,15 +5,14 @@
 
 import sys
 
+# Python 3.11 has built-in StrEnum
 if sys.version_info >= (3, 11):
-    from enum import IntEnum, StrEnum
+    from enum import EnumMeta, IntEnum, StrEnum
 else:
-    from enum import IntEnum
+    from enum import EnumMeta, IntEnum
 
     from strenum import StrEnum
 
-from pandas import DataFrame
-
 
 class DataLevel(IntEnum):
     """DataLevel
@@ -27,21 +26,49 @@ class DataLevel(IntEnum):
     SUBJECT = 2
 
 
-class MocapMetadata(StrEnum):
+class EnumContainsMeta(EnumMeta):
+    """ExtendedStrEnum
+
+    This is an extension of the StrEnum class from the enum module.
+    It adds the __contains__ method, which allows checking if a
+    string is a valid member of the enum.
+
+    It also adds the __getitem__ method, which allows getting the
+    value of a member from its name, or if you already pass in a value,
+    it will return the value.
+    """
+
+    def __contains__(self, item: object) -> bool:
+        if not isinstance(item, str):
+            return super().__contains__(item)
+        try:
+            self[item]
+        except KeyError:
+            return False
+        else:
+            return True
+
+    def __getitem__(self, item: str) -> str:  # type: ignore
+        if not isinstance(item, str):
+            return str(super().__getitem__(item))
+        if item in self._member_map_:
+            return str(self._member_map_[item].value)
+        if item in self._value2member_map_:
+            return item
+        msg = f"{item} is not a valid member of {self.__class__.__name__}"
+        raise KeyError(msg)
+
+
+class MocapMetadataEntries(StrEnum, metaclass=EnumContainsMeta):
     """MocapMetadata
 
     Common metadata for all MoCap data, and their transformed names.
     This allows a common interface for all MoCap data.
     """
 
-    cam_count = ("n_cameras",)
-    frame_count = ("n_frames",)
-    marker_names = ("marker_names",)
-    marker_count = ("n_markers",)
-    sample_rate = ("sample_rate",)
-    time_stamp = ("time_stamp",)
-
-
-class MocapTimeSeries:
-    tsdata: DataFrame
-    metadata: MocapMetadata
+    cam_count = "n_cameras"
+    frame_count = "n_frames"
+    marker_names = "marker_names"
+    marker_count = "n_markers"
+    sample_rate = "sample_rate"
+    time_stamp = "time_stamp"
diff --git a/src/mopipe/common/py.typed b/src/mopipe/common/py.typed
diff --git a/src/mopipe/common/util.py b/src/mopipe/common/util.py
@@ -0,0 +1,38 @@
+"""util.py
+
+Common utility functions.
+"""
+
+import typing as t
+from uuid import uuid4
+
+
+def maybe_generate_id(
+    _id: t.Optional[str] = None, prefix: t.Optional[str] = None, suffix: t.Optional[str] = None
+) -> str:
+    """Generate a random id if not provided.
+
+    This provides a fluid interface for generating unique ids for various classes.
+    Sometimes, a user may want to provide their own id, and if so, this function
+    will simply return the id they provided. If no id is provided, a random id
+    will be generated.
+
+    Parameters
+    ----------
+    _id : str, optional
+        The id to use.
+    prefix : str, optional
+        The prefix to use for the id.
+    suffix : str, optional
+        The suffix to use for the id.
+
+    Returns
+    -------
+    str
+        The id.
+    """
+    if _id is not None:
+        return _id
+    prefix = "" if prefix is None else prefix + "_"
+    suffix = "" if suffix is None else "_" + suffix
+    return prefix + str(uuid4()) + suffix
diff --git a/src/mopipe/data/__init__.py b/src/mopipe/data/__init__.py
@@ -1 +1,10 @@
+from .empirical import (  # noqa: TID252, F401
+    DiscreteData,
+    EmpiricalData,
+    MetaData,
+    MocapMetaData,
+    MocapTimeSeries,
+    TimeseriesData,
+)
+from .experiment import Experiment, ExperimentLevel, Trial  # noqa: TID252, F401
 from .reader import AbstractReader, MocapReader  # noqa: TID252, F401
diff --git a/src/mopipe/data/collator.py b/src/mopipe/data/collator.py
@@ -8,7 +8,7 @@
 
 from pandas import DataFrame
 
-from mopipe.data.reader import AbstractReader
+from mopipe.data import AbstractReader
 
 
 class MocapDataCollator:

diff --git a/src/mopipe/data/empirical.py b/src/mopipe/data/empirical.py
@@ -0,0 +1,98 @@
+"""This contains base classes for defining data associated with experiemnts"""
+
+import typing as t
+
+from pandas import DataFrame, Series
+
+from mopipe.common import MocapMetadataEntries, maybe_generate_id
+
+if t.TYPE_CHECKING:
+    from mopipe.data import ExperimentLevel
+
+
+class MetaData(dict):
+    """MetaData
+
+    Base class for all metadata associated with data.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+
+class MocapMetaData(MetaData):
+    """MocapMetaData
+
+    This automatically transforms the keys of the metadata to the
+    known names in MocapMetadataEntries.
+    """
+
+    def __init__(self, *args, **kwargs):
+        for key, value in kwargs.items():
+            if key in MocapMetadataEntries:
+                kwargs[MocapMetadataEntries[key]] = value
+        super().__init__(*args, **kwargs)
+
+    def __setitem__(self, key: str, value: t.Any):
+        if key in MocapMetadataEntries:
+            key = MocapMetadataEntries[key]
+        super().__setitem__(key, value)
+
+    def __getitem__(self, key: str):
+        if key in MocapMetadataEntries:
+            key = MocapMetadataEntries[key]
+        return super().__getitem__(key)
+
+
+class EmpiricalData:
+    """EmpiricalData
+
+    Base class for all empirical data.
+    """
+
+    data: DataFrame
+    metadata: MetaData
+    level: "ExperimentLevel"
+    name: str
+    data_id: str
+
+    def __getitem__(self, key: t.Union[str, int]) -> Series:
+        if isinstance(key, int):
+            return self.data.iloc[key]
+        return self.data[key]
+
+    def __init__(self, data: DataFrame, metadata: MetaData, name: str, data_id: t.Optional[str] = None):
+        self.data = data
+        self.metadata = metadata
+        self.name = name
+        self.data_id = maybe_generate_id(data_id, prefix=name)
+
+
+class DiscreteData(EmpiricalData):
+    """DiscreteData
+
+    For data that is associated with a level, but not timeseries.
+    """
+
+    pass
+
+
+class TimeseriesData(EmpiricalData):
+    """TimeseriesData
+
+    For timeserioes data that is associated with a level.
+    """
+
+    pass
+
+
+class MocapTimeSeries(TimeseriesData):
+    """MocapTimeSeries
+
+    For Mocap data (i.e. 3D marker positions).
+    """
+
+    metadata: MocapMetaData
+
+    def __init__(self, data: DataFrame, metadata: MocapMetaData, name: str, data_id: t.Optional[str] = None):
+        super().__init__(data, metadata, name, data_id)