diff --git a/docs/getting-started/sportec.ipynb b/docs/getting-started/sportec.ipynb
index 399f8bb4..51227d12 100644
--- a/docs/getting-started/sportec.ipynb
+++ b/docs/getting-started/sportec.ipynb
@@ -7,13 +7,13 @@
"source": [
"# Sportec\n",
"\n",
- "## Load local files"
+ "## Load local event files"
]
},
{
"cell_type": "code",
"execution_count": 1,
- "id": "e9adb7cb",
+ "id": "4f6455fb",
"metadata": {},
"outputs": [
{
@@ -77,7 +77,7 @@
"
38.71 | \n",
" DFL-OBJ-0000ZS | \n",
" KICK_OFF | \n",
- " NaN | \n",
+ " None | \n",
" \n",
" \n",
" 1 | \n",
@@ -97,8 +97,8 @@
" NaN | \n",
" NaN | \n",
" DFL-OBJ-002G3I | \n",
- " NaN | \n",
- " NaN | \n",
+ " None | \n",
+ " None | \n",
"
\n",
" \n",
" 2 | \n",
@@ -119,7 +119,7 @@
" 28.58 | \n",
" DFL-OBJ-0027B9 | \n",
" THROW_IN | \n",
- " NaN | \n",
+ " None | \n",
"
\n",
" \n",
" 3 | \n",
@@ -138,8 +138,8 @@
" 28.58 | \n",
" NaN | \n",
" NaN | \n",
- " NaN | \n",
- " NaN | \n",
+ " None | \n",
+ " None | \n",
" RIGHT_FOOT | \n",
"
\n",
" \n",
@@ -160,8 +160,8 @@
" NaN | \n",
" NaN | \n",
" None | \n",
- " NaN | \n",
- " NaN | \n",
+ " None | \n",
+ " None | \n",
"
\n",
" \n",
"\n",
@@ -190,11 +190,11 @@
"4 8.72 4.21 NaN NaN \n",
"\n",
" receiver_player_id set_piece_type body_part_type \n",
- "0 DFL-OBJ-0000ZS KICK_OFF NaN \n",
- "1 DFL-OBJ-002G3I NaN NaN \n",
- "2 DFL-OBJ-0027B9 THROW_IN NaN \n",
- "3 NaN NaN RIGHT_FOOT \n",
- "4 None NaN NaN "
+ "0 DFL-OBJ-0000ZS KICK_OFF None \n",
+ "1 DFL-OBJ-002G3I None None \n",
+ "2 DFL-OBJ-0027B9 THROW_IN None \n",
+ "3 None None RIGHT_FOOT \n",
+ "4 None None None "
]
},
"execution_count": 1,
@@ -205,7 +205,7 @@
"source": [
"from kloppy import sportec\n",
"\n",
- "dataset = sportec.load(\n",
+ "dataset = sportec.load_event(\n",
" event_data=\"../../kloppy/tests/files/sportec_events.xml\",\n",
" meta_data=\"../../kloppy/tests/files/sportec_meta.xml\",\n",
" \n",
@@ -216,6 +216,258 @@
"\n",
"dataset.to_df().head()"
]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "81989fc6",
+ "metadata": {},
+ "source": [
+ "# Load local tracking files"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "958f17ee",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " period_id | \n",
+ " timestamp | \n",
+ " frame_id | \n",
+ " ball_state | \n",
+ " ball_owning_team_id | \n",
+ " ball_x | \n",
+ " ball_y | \n",
+ " ball_z | \n",
+ " ball_speed | \n",
+ " DFL-OBJ-002G3I_x | \n",
+ " ... | \n",
+ " DFL-OBJ-002G3I_d | \n",
+ " DFL-OBJ-002G3I_s | \n",
+ " DFL-OBJ-002G5S_x | \n",
+ " DFL-OBJ-002G5S_y | \n",
+ " DFL-OBJ-002G5S_d | \n",
+ " DFL-OBJ-002G5S_s | \n",
+ " DFL-OBJ-002FVJ_x | \n",
+ " DFL-OBJ-002FVJ_y | \n",
+ " DFL-OBJ-002FVJ_d | \n",
+ " DFL-OBJ-002FVJ_s | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0.00 | \n",
+ " 10000 | \n",
+ " dead | \n",
+ " DFL-CLU-000004 | \n",
+ " 2.69 | \n",
+ " 0.26 | \n",
+ " 0.06 | \n",
+ " 0.00 | \n",
+ " 0.35 | \n",
+ " ... | \n",
+ " None | \n",
+ " 0.00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.04 | \n",
+ " 10001 | \n",
+ " alive | \n",
+ " DFL-CLU-00000A | \n",
+ " 3.41 | \n",
+ " 0.26 | \n",
+ " 0.08 | \n",
+ " 65.59 | \n",
+ " 0.34 | \n",
+ " ... | \n",
+ " None | \n",
+ " 1.74 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 0.08 | \n",
+ " 10002 | \n",
+ " alive | \n",
+ " DFL-CLU-000004 | \n",
+ " 4.22 | \n",
+ " 0.33 | \n",
+ " 0.09 | \n",
+ " 65.16 | \n",
+ " 0.32 | \n",
+ " ... | \n",
+ " None | \n",
+ " 1.76 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 0.12 | \n",
+ " 10003 | \n",
+ " alive | \n",
+ " DFL-CLU-000004 | \n",
+ " 5.02 | \n",
+ " 0.38 | \n",
+ " 0.09 | \n",
+ " 74.34 | \n",
+ " 0.31 | \n",
+ " ... | \n",
+ " None | \n",
+ " 1.78 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 0.16 | \n",
+ " 10004 | \n",
+ " alive | \n",
+ " DFL-CLU-000004 | \n",
+ " 5.79 | \n",
+ " 0.44 | \n",
+ " 0.08 | \n",
+ " 73.58 | \n",
+ " 0.29 | \n",
+ " ... | \n",
+ " None | \n",
+ " 1.80 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 21 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " period_id timestamp frame_id ball_state ball_owning_team_id ball_x \\\n",
+ "0 1 0.00 10000 dead DFL-CLU-000004 2.69 \n",
+ "1 1 0.04 10001 alive DFL-CLU-00000A 3.41 \n",
+ "2 1 0.08 10002 alive DFL-CLU-000004 4.22 \n",
+ "3 1 0.12 10003 alive DFL-CLU-000004 5.02 \n",
+ "4 1 0.16 10004 alive DFL-CLU-000004 5.79 \n",
+ "\n",
+ " ball_y ball_z ball_speed DFL-OBJ-002G3I_x ... DFL-OBJ-002G3I_d \\\n",
+ "0 0.26 0.06 0.00 0.35 ... None \n",
+ "1 0.26 0.08 65.59 0.34 ... None \n",
+ "2 0.33 0.09 65.16 0.32 ... None \n",
+ "3 0.38 0.09 74.34 0.31 ... None \n",
+ "4 0.44 0.08 73.58 0.29 ... None \n",
+ "\n",
+ " DFL-OBJ-002G3I_s DFL-OBJ-002G5S_x DFL-OBJ-002G5S_y DFL-OBJ-002G5S_d \\\n",
+ "0 0.00 NaN NaN None \n",
+ "1 1.74 NaN NaN None \n",
+ "2 1.76 NaN NaN None \n",
+ "3 1.78 NaN NaN None \n",
+ "4 1.80 NaN NaN None \n",
+ "\n",
+ " DFL-OBJ-002G5S_s DFL-OBJ-002FVJ_x DFL-OBJ-002FVJ_y DFL-OBJ-002FVJ_d \\\n",
+ "0 NaN NaN NaN None \n",
+ "1 NaN NaN NaN None \n",
+ "2 NaN NaN NaN None \n",
+ "3 NaN NaN NaN None \n",
+ "4 NaN NaN NaN None \n",
+ "\n",
+ " DFL-OBJ-002FVJ_s \n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ "[5 rows x 21 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from kloppy import sportec\n",
+ "\n",
+ "\n",
+ "dataset = sportec.load_tracking(\n",
+ " raw_data=\"../../kloppy/tests/files/sportec_positional.xml\",\n",
+ " meta_data=\"../../kloppy/tests/files/sportec_meta.xml\",\n",
+ " \n",
+ " # Optional arguments\n",
+ " coordinates=\"sportec\",\n",
+ " only_alive=False\n",
+ ")\n",
+ "\n",
+ "dataset.to_df().head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "366f1126",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
diff --git a/kloppy/_providers/sportec.py b/kloppy/_providers/sportec.py
index 9426fcde..2aff6625 100644
--- a/kloppy/_providers/sportec.py
+++ b/kloppy/_providers/sportec.py
@@ -1,17 +1,22 @@
from typing import Optional, List
from kloppy.config import get_config
-from kloppy.domain import EventDataset, EventFactory
+from kloppy.domain import EventDataset, EventFactory, TrackingDataset
from kloppy.infra.serializers.event.sportec import (
- SportecEventDeserializer,
- SportecInputs,
+ SportecEventDataDeserializer,
+ SportecEventDataInputs,
)
-from kloppy.io import open_as_file
+from kloppy.infra.serializers.tracking.sportec import (
+ SportecTrackingDataDeserializer,
+ SportecTrackingDataInputs,
+)
+from kloppy.io import open_as_file, FileLike
+from kloppy.utils import deprecated
-def load(
- event_data: str,
- meta_data: str,
+def load_event(
+ event_data: FileLike,
+ meta_data: FileLike,
event_types: Optional[List[str]] = None,
coordinates: Optional[str] = None,
event_factory: Optional[EventFactory] = None,
@@ -27,7 +32,7 @@ def load(
event_factory:
"""
- serializer = SportecEventDeserializer(
+ serializer = SportecEventDataDeserializer(
event_types=event_types,
coordinate_system=coordinates,
event_factory=event_factory or get_config("event_factory"),
@@ -36,5 +41,44 @@ def load(
meta_data
) as meta_data_fp:
return serializer.deserialize(
- SportecInputs(event_data=event_data_fp, meta_data=meta_data_fp)
+ SportecEventDataInputs(
+ event_data=event_data_fp, meta_data=meta_data_fp
+ )
+ )
+
+
+def load_tracking(
+ meta_data: FileLike,
+ raw_data: FileLike,
+ sample_rate: Optional[float] = None,
+ limit: Optional[int] = None,
+ coordinates: Optional[str] = None,
+ only_alive: Optional[bool] = True,
+) -> TrackingDataset:
+ deserializer = SportecTrackingDataDeserializer(
+ sample_rate=sample_rate,
+ limit=limit,
+ coordinate_system=coordinates,
+ only_alive=only_alive,
+ )
+ with open_as_file(meta_data) as meta_data_fp, open_as_file(
+ raw_data
+ ) as raw_data_fp:
+ return deserializer.deserialize(
+ inputs=SportecTrackingDataInputs(
+ meta_data=meta_data_fp, raw_data=raw_data_fp
+ )
)
+
+
+@deprecated("sportec.load_event should be used")
+def load(
+ event_data: FileLike,
+ meta_data: FileLike,
+ event_types: Optional[List[str]] = None,
+ coordinates: Optional[str] = None,
+ event_factory: Optional[EventFactory] = None,
+) -> EventDataset:
+ return load_event(
+ event_data, meta_data, event_types, coordinates, event_factory
+ )
diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py
index 4b60b893..dabad000 100644
--- a/kloppy/domain/models/common.py
+++ b/kloppy/domain/models/common.py
@@ -17,6 +17,7 @@
Iterable,
)
+
if sys.version_info >= (3, 8):
from typing import Literal
else:
@@ -518,7 +519,7 @@ def pitch_dimensions(self) -> PitchDimensions:
@dataclass
-class SportecCoordinateSystem(CoordinateSystem):
+class SportecEventDataCoordinateSystem(CoordinateSystem):
@property
def provider(self) -> Provider:
return Provider.SPORTEC
@@ -541,6 +542,30 @@ def pitch_dimensions(self) -> PitchDimensions:
)
+@dataclass
+class SportecTrackingDataCoordinateSystem(CoordinateSystem):
+ @property
+ def provider(self) -> Provider:
+ return Provider.SPORTEC
+
+ @property
+ def origin(self) -> Origin:
+ return Origin.CENTER
+
+ @property
+ def vertical_orientation(self) -> VerticalOrientation:
+ return VerticalOrientation.BOTTOM_TO_TOP
+
+ @property
+ def pitch_dimensions(self) -> PitchDimensions:
+ return PitchDimensions(
+ x_dim=Dimension(-self.length / 2, self.length / 2),
+ y_dim=Dimension(-self.width / 2, self.width / 2),
+ length=self.length,
+ width=self.width,
+ )
+
+
@dataclass
class StatsBombCoordinateSystem(CoordinateSystem):
@property
@@ -654,7 +679,27 @@ def pitch_dimensions(self) -> PitchDimensions:
)
-def build_coordinate_system(provider: Provider, **kwargs):
+class DatasetType(Enum):
+ """
+ DatasetType
+
+ Attributes:
+ TRACKING (DatasetType):
+ EVENT (DatasetType):
+ CODE (DatasetType):
+ """
+
+ TRACKING = "TRACKING"
+ EVENT = "EVENT"
+ CODE = "CODE"
+
+ def __repr__(self):
+ return self.value
+
+
+def build_coordinate_system(
+ provider: Provider, dataset_type: DatasetType = DatasetType.EVENT, **kwargs
+):
if provider == Provider.TRACAB:
return TracabCoordinateSystem(normalized=False, **kwargs)
@@ -668,7 +713,12 @@ def build_coordinate_system(provider: Provider, **kwargs):
return OptaCoordinateSystem(normalized=False, **kwargs)
if provider == Provider.SPORTEC:
- return SportecCoordinateSystem(normalized=False, **kwargs)
+ if dataset_type == DatasetType.TRACKING:
+ return SportecTrackingDataCoordinateSystem(
+ normalized=False, **kwargs
+ )
+ else:
+ return SportecEventDataCoordinateSystem(normalized=False, **kwargs)
if provider == Provider.STATSBOMB:
return StatsBombCoordinateSystem(normalized=False, **kwargs)
@@ -795,24 +845,6 @@ class Metadata:
attributes: Optional[Dict] = field(default_factory=dict, compare=False)
-class DatasetType(Enum):
- """
- DatasetType
-
- Attributes:
- TRACKING (DatasetType):
- EVENT (DatasetType):
- CODE (DatasetType):
- """
-
- TRACKING = "TRACKING"
- EVENT = "EVENT"
- CODE = "CODE"
-
- def __repr__(self):
- return self.value
-
-
T = TypeVar("T", bound="DataRecord")
@@ -838,6 +870,9 @@ def __iter__(self):
def __getitem__(self, item):
return self.records[item]
+ def __len__(self):
+ return len(self.records)
+
def __post_init__(self):
for i, record in enumerate(self.records):
record.set_refs(
diff --git a/kloppy/domain/services/__init__.py b/kloppy/domain/services/__init__.py
index 3e3884d0..404a7f8c 100644
--- a/kloppy/domain/services/__init__.py
+++ b/kloppy/domain/services/__init__.py
@@ -2,7 +2,7 @@
from kloppy.domain import AttackingDirection, Frame, Ground
-from .transformers import DatasetTransformer
+from .transformers import DatasetTransformer, DatasetTransformerBuilder
from .event_factory import EventFactory, create_event
# NOT YET: from .enrichers import TrackingPossessionEnricher
diff --git a/kloppy/domain/services/transformers/__init__.py b/kloppy/domain/services/transformers/__init__.py
index 41ba032f..92dac6b3 100644
--- a/kloppy/domain/services/transformers/__init__.py
+++ b/kloppy/domain/services/transformers/__init__.py
@@ -1 +1 @@
-from .dataset import DatasetTransformer
+from .dataset import DatasetTransformer, DatasetTransformerBuilder
diff --git a/kloppy/domain/services/transformers/attribute.py b/kloppy/domain/services/transformers/attribute.py
index 09bdcf02..8bdff050 100644
--- a/kloppy/domain/services/transformers/attribute.py
+++ b/kloppy/domain/services/transformers/attribute.py
@@ -274,6 +274,7 @@ def __call__(self, frame: Frame) -> Dict[str, Any]:
ball_z=getattr(frame.ball_coordinates, "z", None)
if frame.ball_coordinates
else None,
+ ball_speed=frame.ball_speed,
)
for player, player_data in frame.players_data.items():
row.update(
diff --git a/kloppy/domain/services/transformers/dataset.py b/kloppy/domain/services/transformers/dataset.py
index c8407655..3ddb1d46 100644
--- a/kloppy/domain/services/transformers/dataset.py
+++ b/kloppy/domain/services/transformers/dataset.py
@@ -1,4 +1,5 @@
from dataclasses import fields, replace
+
from kloppy.domain.models.tracking import PlayerData
from typing import Union, Optional
@@ -15,6 +16,9 @@
Team,
TrackingDataset,
CoordinateSystem,
+ Provider,
+ build_coordinate_system,
+ DatasetType,
)
from kloppy.domain.models.event import Event
from kloppy.exceptions import KloppyError
@@ -431,3 +435,57 @@ def transform_dataset(
)
else:
raise KloppyError("Unknown Dataset type")
+
+
+class DatasetTransformerBuilder:
+ def __init__(
+ self, to_coordinate_system: Optional[Union[str, Provider]] = None
+ ):
+ from kloppy.config import get_config
+
+ if not to_coordinate_system:
+ to_coordinate_system = get_config("coordinate_system")
+
+ if not to_coordinate_system:
+ to_coordinate_system = Provider.KLOPPY
+
+ to_dataset_type = None
+ if isinstance(to_coordinate_system, str):
+ if ":" in to_coordinate_system:
+ provider_name, dataset_type_name = to_coordinate_system.split(
+ ":"
+ )
+ to_coordinate_system = Provider[provider_name.upper()]
+ to_dataset_type = DatasetType[dataset_type_name.upper()]
+ else:
+ to_coordinate_system = Provider[to_coordinate_system.upper()]
+
+ self.to_coordinate_system = to_coordinate_system
+ self.to_dataset_type = to_dataset_type
+
+ def build(
+ self,
+ length: float,
+ width: float,
+ provider: Provider,
+ dataset_type: DatasetType,
+ ):
+ from_coordinate_system = build_coordinate_system(
+ # This comment forces black to keep the arguments as multi-line
+ provider,
+ length=length,
+ width=width,
+ dataset_type=dataset_type,
+ )
+
+ to_coordinate_system = build_coordinate_system(
+ self.to_coordinate_system,
+ length=length,
+ width=width,
+ dataset_type=self.to_dataset_type or dataset_type,
+ )
+
+ return DatasetTransformer(
+ from_coordinate_system=from_coordinate_system,
+ to_coordinate_system=to_coordinate_system,
+ )
diff --git a/kloppy/infra/serializers/event/deserializer.py b/kloppy/infra/serializers/event/deserializer.py
index cec06b42..ce80705b 100644
--- a/kloppy/infra/serializers/event/deserializer.py
+++ b/kloppy/infra/serializers/event/deserializer.py
@@ -1,15 +1,15 @@
from abc import ABC, abstractmethod
from typing import Optional, List, Generic, TypeVar, Union
-from kloppy.config import get_config
from kloppy.domain import (
EventDataset,
Event,
EventType,
DatasetTransformer,
Provider,
- build_coordinate_system,
EventFactory,
+ DatasetType,
+ DatasetTransformerBuilder,
)
T = TypeVar("T")
@@ -32,13 +32,7 @@ def __init__(
for event_type in event_types
]
- if not coordinate_system:
- coordinate_system = get_config("coordinate_system")
-
- if isinstance(coordinate_system, str):
- coordinate_system = Provider[coordinate_system.upper()]
-
- self.coordinate_system = coordinate_system
+ self.transformer_builder = DatasetTransformerBuilder(coordinate_system)
if not event_factory:
event_factory = EventFactory()
@@ -50,23 +44,13 @@ def should_include_event(self, event: Event) -> bool:
return event.event_type in self.event_types
def get_transformer(
- self, length: float, width: float
+ self, length: float, width: float, provider: Optional[Provider] = None
) -> DatasetTransformer:
- from_coordinate_system = build_coordinate_system(
- self.provider,
- length=length,
- width=width,
- )
-
- to_coordinate_system = build_coordinate_system(
- self.coordinate_system,
+ return self.transformer_builder.build(
length=length,
width=width,
- )
-
- return DatasetTransformer(
- from_coordinate_system=from_coordinate_system,
- to_coordinate_system=to_coordinate_system,
+ provider=provider or self.provider,
+ dataset_type=DatasetType.EVENT,
)
@property
diff --git a/kloppy/infra/serializers/event/sportec/__init__.py b/kloppy/infra/serializers/event/sportec/__init__.py
index d6faa3f4..1d3665c1 100644
--- a/kloppy/infra/serializers/event/sportec/__init__.py
+++ b/kloppy/infra/serializers/event/sportec/__init__.py
@@ -1 +1 @@
-from .deserializer import SportecEventDeserializer, SportecInputs
+from .deserializer import SportecEventDataDeserializer, SportecEventDataInputs
diff --git a/kloppy/infra/serializers/event/sportec/deserializer.py b/kloppy/infra/serializers/event/sportec/deserializer.py
index 0f08c543..87cabab9 100644
--- a/kloppy/infra/serializers/event/sportec/deserializer.py
+++ b/kloppy/infra/serializers/event/sportec/deserializer.py
@@ -1,5 +1,5 @@
from collections import OrderedDict
-from typing import Tuple, Dict, List, NamedTuple, IO
+from typing import Dict, List, NamedTuple, IO
import logging
from dateutil.parser import parse
from lxml import objectify
@@ -12,9 +12,6 @@
BallState,
DatasetFlag,
Orientation,
- PassEvent,
- ShotEvent,
- GenericEvent,
PassResult,
ShotResult,
EventType,
@@ -29,12 +26,7 @@
BodyPartQualifier,
BodyPart,
Qualifier,
- BallOutEvent,
- RecoveryEvent,
- SubstitutionEvent,
- CardEvent,
CardType,
- FoulCommittedEvent,
AttackingDirection,
)
from kloppy.exceptions import DeserializationError
@@ -74,6 +66,117 @@ def _team_from_xml_elm(team_elm) -> Team:
return team
+SPORTEC_FPS = 25
+
+"""Sportec uses fixed starting frame ids for each half"""
+SPORTEC_FIRST_HALF_STARTING_FRAME_ID = 10_000
+SPORTEC_SECOND_HALF_STARTING_FRAME_ID = 100_000
+SPORTEC_FIRST_EXTRA_HALF_STARTING_FRAME_ID = 200_000
+SPORTEC_SECOND_EXTRA_HALF_STARTING_FRAME_ID = 250_000
+
+
+class SportecMetadata(NamedTuple):
+ score: Score
+ teams: List[Team]
+ periods: List[Period]
+ x_max: float
+ y_max: float
+ fps: int
+
+
+def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
+ """
+ Load metadata from Sportec XML element. This part is shared between event- and tracking data.
+ In the future this might move to a common.sportec package that provides functionality for both
+ deserializers.
+ """
+ x_max = float(match_root.MatchInformation.Environment.attrib["PitchX"])
+ y_max = float(match_root.MatchInformation.Environment.attrib["PitchY"])
+
+ team_path = objectify.ObjectPath("PutDataRequest.MatchInformation.Teams")
+ team_elms = list(team_path.find(match_root).iterchildren("Team"))
+
+ home_team = away_team = None
+ for team_elm in team_elms:
+ if team_elm.attrib["Role"] == "home":
+ home_team = _team_from_xml_elm(team_elm)
+ elif team_elm.attrib["Role"] == "guest":
+ away_team = _team_from_xml_elm(team_elm)
+ else:
+ raise DeserializationError(
+ f"Unknown side: {team_elm.attrib['Role']}"
+ )
+
+ if not home_team:
+ raise DeserializationError("Home team is missing from metadata")
+ if not away_team:
+ raise DeserializationError("Away team is missing from metadata")
+
+ (home_score, away_score,) = match_root.MatchInformation.General.attrib[
+ "Result"
+ ].split(":")
+ score = Score(home=int(home_score), away=int(away_score))
+ teams = [home_team, away_team]
+
+ if len(home_team.players) == 0 or len(away_team.players) == 0:
+ raise DeserializationError("LineUp incomplete")
+
+ # The periods can be rebuild from event data. Therefore, the periods attribute
+ # from the metadata can be ignored. It is required for tracking data.
+ other_game_information = (
+ match_root.MatchInformation.OtherGameInformation.attrib
+ )
+ periods = [
+ Period(
+ id=1,
+ start_timestamp=SPORTEC_FIRST_HALF_STARTING_FRAME_ID / SPORTEC_FPS,
+ end_timestamp=SPORTEC_FIRST_HALF_STARTING_FRAME_ID / SPORTEC_FPS
+ + float(other_game_information["TotalTimeFirstHalf"]) / 1000,
+ ),
+ Period(
+ id=2,
+ start_timestamp=SPORTEC_SECOND_HALF_STARTING_FRAME_ID
+ / SPORTEC_FPS,
+ end_timestamp=SPORTEC_SECOND_HALF_STARTING_FRAME_ID / SPORTEC_FPS
+ + float(other_game_information["TotalTimeSecondHalf"]) / 1000,
+ ),
+ ]
+
+ if "TotalTimeFirstHalfExtra" in other_game_information:
+ # Add two periods for extra time.
+ periods.extend(
+ [
+ Period(
+ id=3,
+ start_timestamp=SPORTEC_FIRST_EXTRA_HALF_STARTING_FRAME_ID
+ / SPORTEC_FPS,
+ end_timestamp=SPORTEC_FIRST_EXTRA_HALF_STARTING_FRAME_ID
+ / SPORTEC_FPS
+ + float(other_game_information["TotalTimeFirstHalfExtra"])
+ / 1000,
+ ),
+ Period(
+ id=4,
+ start_timestamp=SPORTEC_SECOND_EXTRA_HALF_STARTING_FRAME_ID
+ / SPORTEC_FPS,
+ end_timestamp=SPORTEC_SECOND_EXTRA_HALF_STARTING_FRAME_ID
+ / SPORTEC_FPS
+ + float(other_game_information["TotalTimeSecondHalfExtra"])
+ / 1000,
+ ),
+ ]
+ )
+
+ return SportecMetadata(
+ score=score,
+ teams=teams,
+ periods=periods,
+ x_max=x_max,
+ y_max=y_max,
+ fps=SPORTEC_FPS,
+ )
+
+
def _event_chain_from_xml_elm(event_elm):
chain = OrderedDict()
current_elm = event_elm
@@ -259,55 +362,29 @@ def _parse_coordinates(event_attributes: Dict) -> Point:
)
-class SportecInputs(NamedTuple):
+class SportecEventDataInputs(NamedTuple):
meta_data: IO[bytes]
event_data: IO[bytes]
-class SportecEventDeserializer(EventDataDeserializer[SportecInputs]):
+class SportecEventDataDeserializer(
+ EventDataDeserializer[SportecEventDataInputs]
+):
@property
def provider(self) -> Provider:
return Provider.SPORTEC
- def deserialize(self, inputs: SportecInputs) -> EventDataset:
+ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset:
with performance_logging("load data", logger=logger):
match_root = objectify.fromstring(inputs.meta_data.read())
event_root = objectify.fromstring(inputs.event_data.read())
with performance_logging("parse data", logger=logger):
- x_max = float(
- match_root.MatchInformation.Environment.attrib["PitchX"]
+ sportec_metadata = sportec_metadata_from_xml_elm(match_root)
+ teams = home_team, away_team = sportec_metadata.teams
+ transformer = self.get_transformer(
+ length=sportec_metadata.x_max, width=sportec_metadata.y_max
)
- y_max = float(
- match_root.MatchInformation.Environment.attrib["PitchY"]
- )
-
- transformer = self.get_transformer(length=x_max, width=y_max)
-
- team_path = objectify.ObjectPath(
- "PutDataRequest.MatchInformation.Teams"
- )
- team_elms = list(team_path.find(match_root).iterchildren("Team"))
-
- for team_elm in team_elms:
- if team_elm.attrib["Role"] == "home":
- home_team = _team_from_xml_elm(team_elm)
- elif team_elm.attrib["Role"] == "guest":
- away_team = _team_from_xml_elm(team_elm)
- else:
- raise DeserializationError(
- f"Unknown side: {team_elm.attrib['Role']}"
- )
-
- (
- home_score,
- away_score,
- ) = match_root.MatchInformation.General.attrib["Result"].split(":")
- score = Score(home=int(home_score), away=int(away_score))
- teams = [home_team, away_team]
-
- if len(home_team.players) == 0 or len(away_team.players) == 0:
- raise DeserializationError("LineUp incomplete")
periods = []
period_id = 0
@@ -518,7 +595,7 @@ def deserialize(self, inputs: SportecInputs) -> EventDataset:
teams=teams,
periods=periods,
pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions,
- score=score,
+ score=sportec_metadata.score,
frame_rate=None,
orientation=orientation,
flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM),
diff --git a/kloppy/infra/serializers/tracking/deserializer.py b/kloppy/infra/serializers/tracking/deserializer.py
index cc370cba..0635566c 100644
--- a/kloppy/infra/serializers/tracking/deserializer.py
+++ b/kloppy/infra/serializers/tracking/deserializer.py
@@ -4,8 +4,9 @@
from kloppy.domain import (
Provider,
TrackingDataset,
- build_coordinate_system,
DatasetTransformer,
+ DatasetTransformerBuilder,
+ DatasetType,
)
T = TypeVar("T")
@@ -26,32 +27,16 @@ def __init__(
sample_rate = 1.0
self.sample_rate = sample_rate
- if not coordinate_system:
- coordinate_system = Provider.KLOPPY
-
- if isinstance(coordinate_system, str):
- coordinate_system = Provider[coordinate_system.upper()]
-
- self.coordinate_system = coordinate_system
+ self.transformer_builder = DatasetTransformerBuilder(coordinate_system)
def get_transformer(
self, length: float, width: float, provider: Optional[Provider] = None
) -> DatasetTransformer:
- from_coordinate_system = build_coordinate_system(
- provider or self.provider,
- length=length,
- width=width,
- )
-
- to_coordinate_system = build_coordinate_system(
- self.coordinate_system,
+ return self.transformer_builder.build(
length=length,
width=width,
- )
-
- return DatasetTransformer(
- from_coordinate_system=from_coordinate_system,
- to_coordinate_system=to_coordinate_system,
+ provider=provider or self.provider,
+ dataset_type=DatasetType.TRACKING,
)
@property
diff --git a/kloppy/infra/serializers/tracking/sportec/__init__.py b/kloppy/infra/serializers/tracking/sportec/__init__.py
new file mode 100644
index 00000000..245f9471
--- /dev/null
+++ b/kloppy/infra/serializers/tracking/sportec/__init__.py
@@ -0,0 +1,4 @@
+from .deserializer import (
+ SportecTrackingDataDeserializer,
+ SportecTrackingDataInputs,
+)
diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py
new file mode 100644
index 00000000..45b05b1f
--- /dev/null
+++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py
@@ -0,0 +1,232 @@
+import logging
+from collections import defaultdict
+from typing import NamedTuple, Optional, Union, IO
+
+from lxml import objectify
+
+from kloppy.domain import (
+ TrackingDataset,
+ DatasetFlag,
+ AttackingDirection,
+ Frame,
+ Point,
+ Point3D,
+ BallState,
+ Period,
+ Orientation,
+ attacking_direction_from_frame,
+ Metadata,
+ Provider,
+ PlayerData,
+)
+
+from kloppy.utils import performance_logging
+
+from ..deserializer import TrackingDataDeserializer
+from kloppy.infra.serializers.event.sportec.deserializer import (
+ sportec_metadata_from_xml_elm,
+)
+
+logger = logging.getLogger(__name__)
+
+PERIOD_ID_TO_GAME_SECTION = {
+ 1: "firstHalf",
+ 2: "secondHalf",
+ 3: "firstHalfExtra",
+ 4: "secondHalfExtra",
+}
+
+
+def _read_section_data(data_root, period: Period) -> dict:
+ """
+ Read all data for a single period from data_root.
+
+ Output format:
+ {
+ 10_000: {
+ 'ball': {
+ 'N': "10000",
+ 'X': 20.92,
+ 'Y': 2.84,
+ 'Z': 0.08,
+ 'S': 4.91,
+ 'BallPossession': "2",
+ 'BallStatus': "1"
+ [...]
+ },
+ 'DFL-OBJ-002G3I': {
+ 'N': "10000",
+ 'X': "0.35",
+ 'Y': "-25.26",
+ 'S': "0.00",
+ [...]
+ },
+ [....]
+ },
+ 10_001: {
+ ...
+ }
+ }
+ """
+
+ game_section = PERIOD_ID_TO_GAME_SECTION[period.id]
+ frame_sets = data_root.findall(
+ f"Positions/FrameSet[@GameSection='{game_section}']"
+ )
+
+ raw_frames = defaultdict(dict)
+ for frame_set in frame_sets:
+ key = (
+ "ball"
+ if frame_set.attrib["TeamId"] == "BALL"
+ else frame_set.attrib["PersonId"]
+ )
+ for frame in frame_set.iterchildren("Frame"):
+ attr = frame.attrib
+ frame_id = int(attr["N"])
+ raw_frames[frame_id][key] = attr
+
+ return raw_frames
+
+
+class SportecTrackingDataInputs(NamedTuple):
+ meta_data: IO[bytes]
+ raw_data: IO[bytes]
+
+
+class SportecTrackingDataDeserializer(TrackingDataDeserializer):
+ @property
+ def provider(self) -> Provider:
+ return Provider.SPORTEC
+
+ def __init__(
+ self,
+ limit: Optional[int] = None,
+ sample_rate: Optional[float] = None,
+ coordinate_system: Optional[Union[str, Provider]] = None,
+ only_alive: Optional[bool] = True,
+ ):
+ super().__init__(limit, sample_rate, coordinate_system)
+ self.only_alive = only_alive
+
+ def deserialize(
+ self, inputs: SportecTrackingDataInputs
+ ) -> TrackingDataset:
+ with performance_logging("load data", logger=logger):
+ match_root = objectify.fromstring(inputs.meta_data.read())
+ data_root = objectify.fromstring(inputs.raw_data.read())
+
+ with performance_logging("parse metadata", logger=logger):
+ sportec_metadata = sportec_metadata_from_xml_elm(match_root)
+ teams = home_team, away_team = sportec_metadata.teams
+ periods = sportec_metadata.periods
+ transformer = self.get_transformer(
+ length=sportec_metadata.x_max, width=sportec_metadata.y_max
+ )
+
+ with performance_logging("parse raw data", logger=logger):
+
+ def _iter():
+ player_map = {}
+ for player in home_team.players:
+ player_map[player.player_id] = player
+ for player in away_team.players:
+ player_map[player.player_id] = player
+
+ sample = 1.0 / self.sample_rate
+
+ for period in periods:
+ raw_frames = _read_section_data(data_root, period)
+
+ # Since python 3.6 dict keep insertion order. Don't need to sort
+ # on frame ID as it's already sorted.
+ # Ball FrameSet is always first and contains ALL frame ids. This
+ # makes sure even with substitutes the data is on order.
+ for i, (frame_id, frame_data) in enumerate(
+ sorted(raw_frames.items())
+ ):
+ if "ball" not in frame_data:
+ # Frames without ball data are corrupt.
+ continue
+
+ ball_data = frame_data["ball"]
+ if self.only_alive and ball_data["BallStatus"] != "1":
+ continue
+
+ if i % sample == 0:
+ yield Frame(
+ frame_id=frame_id,
+ timestamp=(
+ (
+ frame_id
+ # Do subtraction with integers to prevent floating errors
+ - period.start_timestamp
+ * sportec_metadata.fps
+ )
+ / sportec_metadata.fps
+ ),
+ ball_owning_team=home_team
+ if ball_data["BallPossession"] == "1"
+ else away_team,
+ ball_state=BallState.ALIVE
+ if ball_data["BallStatus"] == "1"
+ else BallState.DEAD,
+ period=period,
+ players_data={
+ player_map[player_id]: PlayerData(
+ coordinates=Point(
+ x=float(raw_player_data["X"]),
+ y=float(raw_player_data["Y"]),
+ ),
+ speed=float(raw_player_data["S"]),
+ )
+ for player_id, raw_player_data in frame_data.items()
+ if player_id != "ball"
+ },
+ other_data={},
+ ball_coordinates=Point3D(
+ x=float(ball_data["X"]),
+ y=float(ball_data["Y"]),
+ z=float(ball_data["Z"]),
+ ),
+ ball_speed=float(ball_data["S"]),
+ )
+
+ frames = []
+ for n, frame in enumerate(_iter()):
+ frame = transformer.transform_frame(frame)
+
+ frames.append(frame)
+
+ if not frame.period.attacking_direction_set:
+ frame.period.set_attacking_direction(
+ attacking_direction=attacking_direction_from_frame(
+ frame
+ )
+ )
+
+ if self.limit and n >= self.limit:
+ break
+
+ orientation = (
+ Orientation.FIXED_HOME_AWAY
+ if periods[0].attacking_direction == AttackingDirection.HOME_AWAY
+ else Orientation.FIXED_AWAY_HOME
+ )
+
+ metadata = Metadata(
+ teams=teams,
+ periods=periods,
+ pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions,
+ score=sportec_metadata.score,
+ frame_rate=sportec_metadata.fps,
+ orientation=orientation,
+ provider=Provider.SPORTEC,
+ flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE,
+ coordinate_system=transformer.get_to_coordinate_system(),
+ )
+
+ return TrackingDataset(
+ records=frames,
+ metadata=metadata,
+ )
diff --git a/kloppy/sportec.py b/kloppy/sportec.py
index 27cf35f6..79595791 100644
--- a/kloppy/sportec.py
+++ b/kloppy/sportec.py
@@ -1 +1 @@
-from ._providers.sportec import load
+from ._providers.sportec import load, load_event, load_tracking
diff --git a/kloppy/tests/files/sportec_positional.xml b/kloppy/tests/files/sportec_positional.xml
new file mode 100644
index 00000000..8e85056d
--- /dev/null
+++ b/kloppy/tests/files/sportec_positional.xml
@@ -0,0 +1,512 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/kloppy/tests/test_helpers.py b/kloppy/tests/test_helpers.py
index d320b6fa..c687ed2a 100644
--- a/kloppy/tests/test_helpers.py
+++ b/kloppy/tests/test_helpers.py
@@ -296,6 +296,7 @@ def test_to_pandas(self):
"ball_x": {0: 100, 1: 0},
"ball_y": {0: -50, 1: 50},
"ball_z": {0: 0, 1: 1},
+ "ball_speed": {0: None, 1: None},
"home_1_x": {0: None, 1: 15.0},
"home_1_y": {0: None, 1: 35.0},
"home_1_d": {0: None, 1: 0.03},
@@ -348,6 +349,7 @@ def test_to_pandas_additional_columns(self):
"ball_x": [100, 0],
"ball_y": [-50, 50],
"ball_z": [0, 1],
+ "ball_speed": [None, None],
"match": ["test", "test"],
"bonus_column": [11, 12],
"home_1_x": [None, 15],
diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py
index c33b789e..95951cf7 100644
--- a/kloppy/tests/test_sportec.py
+++ b/kloppy/tests/test_sportec.py
@@ -11,12 +11,14 @@
SetPieceType,
BodyPart,
DatasetType,
+ BallState,
+ Point3D,
)
from kloppy import sportec
-class TestSportecEvent:
+class TestSportecEventData:
""""""
@pytest.fixture
@@ -27,9 +29,13 @@ def event_data(self, base_dir) -> str:
def meta_data(self, base_dir) -> str:
return base_dir / "files/sportec_meta.xml"
- def test_correct_deserialization(self, event_data: Path, meta_data: Path):
- dataset = sportec.load(
- event_data=event_data, meta_data=meta_data, coordinates="sportec"
+ def test_correct_event_data_deserialization(
+ self, event_data: Path, meta_data: Path
+ ):
+ dataset = sportec.load_event(
+ event_data=event_data,
+ meta_data=meta_data,
+ coordinates="sportec",
)
assert dataset.metadata.provider == Provider.SPORTEC
@@ -69,9 +75,92 @@ def test_correct_deserialization(self, event_data: Path, meta_data: Path):
assert dataset.events[0].coordinates == Point(56.41, 68.0)
- def test_correct_normalized_deserialization(
+ def test_correct_normalized_event_data_deserialization(
self, event_data: Path, meta_data: Path
):
- dataset = sportec.load(event_data=event_data, meta_data=meta_data)
+ dataset = sportec.load_event(
+ event_data=event_data, meta_data=meta_data
+ )
assert dataset.events[0].coordinates == Point(0.5640999999999999, 1)
+
+
+class TestSportecTrackingData:
+ """
+ Tests for loading Sportec tracking data.
+ """
+
+ @pytest.fixture
+ def raw_data(self, base_dir) -> str:
+ return base_dir / "files/sportec_positional.xml"
+
+ @pytest.fixture
+ def meta_data(self, base_dir) -> str:
+ return base_dir / "files/sportec_meta.xml"
+
+ def test_load_metadata(self, raw_data: Path, meta_data: Path):
+ dataset = sportec.load_tracking(
+ raw_data=raw_data, meta_data=meta_data, coordinates="sportec"
+ )
+
+ assert dataset.metadata.provider == Provider.SPORTEC
+ assert dataset.dataset_type == DatasetType.TRACKING
+ assert len(dataset.metadata.periods) == 2
+
+ def test_load_frames(self, raw_data: Path, meta_data: Path):
+ dataset = sportec.load_tracking(
+ raw_data=raw_data,
+ meta_data=meta_data,
+ coordinates="sportec",
+ only_alive=False,
+ )
+ home_team, away_team = dataset.metadata.teams
+
+ assert dataset.frames[0].timestamp == 0.0
+ assert dataset.frames[0].ball_owning_team == away_team
+ assert dataset.frames[0].ball_state == BallState.DEAD
+ assert dataset.frames[0].ball_coordinates == Point3D(
+ x=2.69, y=0.26, z=0.06
+ )
+ assert dataset.frames[1].ball_speed == 65.59
+
+ assert dataset.frames[1].ball_owning_team == home_team
+ assert dataset.frames[1].ball_state == BallState.ALIVE
+
+ player_lilian = away_team.get_player_by_id("DFL-OBJ-002G3I")
+ player_data = dataset.frames[0].players_data[player_lilian]
+
+ assert player_data.coordinates == Point(x=0.35, y=-25.26)
+
+ # We don't load distance right now as it doesn't
+ # work together with `sample_rate`: "The distance covered from the previous frame in cm"
+ assert player_data.distance is None
+
+ # Appears first in 27th frame
+ player_bensebaini = away_team.get_player_by_id("DFL-OBJ-002G5S")
+ assert player_bensebaini not in dataset.frames[0].players_data
+ assert player_bensebaini in dataset.frames[26].players_data
+
+ # Contains all 3 players
+ assert len(dataset.frames[35].players_data) == 3
+ assert len(dataset) == 202
+
+ second_period = dataset.metadata.periods[1]
+ for frame in dataset:
+ if frame.period == second_period:
+ assert (
+ frame.timestamp == 0
+ ), "First frame must start at timestamp 0.0"
+ break
+ else:
+ # No data found in second half
+ assert False
+
+ def test_load_only_alive_frames(self, raw_data: Path, meta_data: Path):
+ dataset = sportec.load_tracking(
+ raw_data=raw_data,
+ meta_data=meta_data,
+ coordinates="sportec",
+ only_alive=True,
+ )
+ assert len(dataset) == 199