diff --git a/docs/getting-started/sportec.ipynb b/docs/getting-started/sportec.ipynb index 399f8bb4..51227d12 100644 --- a/docs/getting-started/sportec.ipynb +++ b/docs/getting-started/sportec.ipynb @@ -7,13 +7,13 @@ "source": [ "# Sportec\n", "\n", - "## Load local files" + "## Load local event files" ] }, { "cell_type": "code", "execution_count": 1, - "id": "e9adb7cb", + "id": "4f6455fb", "metadata": {}, "outputs": [ { @@ -77,7 +77,7 @@ " 38.71\n", " DFL-OBJ-0000ZS\n", " KICK_OFF\n", - " NaN\n", + " None\n", " \n", " \n", " 1\n", @@ -97,8 +97,8 @@ " NaN\n", " NaN\n", " DFL-OBJ-002G3I\n", - " NaN\n", - " NaN\n", + " None\n", + " None\n", " \n", " \n", " 2\n", @@ -119,7 +119,7 @@ " 28.58\n", " DFL-OBJ-0027B9\n", " THROW_IN\n", - " NaN\n", + " None\n", " \n", " \n", " 3\n", @@ -138,8 +138,8 @@ " 28.58\n", " NaN\n", " NaN\n", - " NaN\n", - " NaN\n", + " None\n", + " None\n", " RIGHT_FOOT\n", " \n", " \n", @@ -160,8 +160,8 @@ " NaN\n", " NaN\n", " None\n", - " NaN\n", - " NaN\n", + " None\n", + " None\n", " \n", " \n", "\n", @@ -190,11 +190,11 @@ "4 8.72 4.21 NaN NaN \n", "\n", " receiver_player_id set_piece_type body_part_type \n", - "0 DFL-OBJ-0000ZS KICK_OFF NaN \n", - "1 DFL-OBJ-002G3I NaN NaN \n", - "2 DFL-OBJ-0027B9 THROW_IN NaN \n", - "3 NaN NaN RIGHT_FOOT \n", - "4 None NaN NaN " + "0 DFL-OBJ-0000ZS KICK_OFF None \n", + "1 DFL-OBJ-002G3I None None \n", + "2 DFL-OBJ-0027B9 THROW_IN None \n", + "3 None None RIGHT_FOOT \n", + "4 None None None " ] }, "execution_count": 1, @@ -205,7 +205,7 @@ "source": [ "from kloppy import sportec\n", "\n", - "dataset = sportec.load(\n", + "dataset = sportec.load_event(\n", " event_data=\"../../kloppy/tests/files/sportec_events.xml\",\n", " meta_data=\"../../kloppy/tests/files/sportec_meta.xml\",\n", " \n", @@ -216,6 +216,258 @@ "\n", "dataset.to_df().head()" ] + }, + { + "cell_type": "markdown", + "id": "81989fc6", + "metadata": {}, + "source": [ + "# Load local tracking files" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "958f17ee", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
period_idtimestampframe_idball_stateball_owning_team_idball_xball_yball_zball_speedDFL-OBJ-002G3I_x...DFL-OBJ-002G3I_dDFL-OBJ-002G3I_sDFL-OBJ-002G5S_xDFL-OBJ-002G5S_yDFL-OBJ-002G5S_dDFL-OBJ-002G5S_sDFL-OBJ-002FVJ_xDFL-OBJ-002FVJ_yDFL-OBJ-002FVJ_dDFL-OBJ-002FVJ_s
010.0010000deadDFL-CLU-0000042.690.260.060.000.35...None0.00NaNNaNNoneNaNNaNNaNNoneNaN
110.0410001aliveDFL-CLU-00000A3.410.260.0865.590.34...None1.74NaNNaNNoneNaNNaNNaNNoneNaN
210.0810002aliveDFL-CLU-0000044.220.330.0965.160.32...None1.76NaNNaNNoneNaNNaNNaNNoneNaN
310.1210003aliveDFL-CLU-0000045.020.380.0974.340.31...None1.78NaNNaNNoneNaNNaNNaNNoneNaN
410.1610004aliveDFL-CLU-0000045.790.440.0873.580.29...None1.80NaNNaNNoneNaNNaNNaNNoneNaN
\n", + "

5 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " period_id timestamp frame_id ball_state ball_owning_team_id ball_x \\\n", + "0 1 0.00 10000 dead DFL-CLU-000004 2.69 \n", + "1 1 0.04 10001 alive DFL-CLU-00000A 3.41 \n", + "2 1 0.08 10002 alive DFL-CLU-000004 4.22 \n", + "3 1 0.12 10003 alive DFL-CLU-000004 5.02 \n", + "4 1 0.16 10004 alive DFL-CLU-000004 5.79 \n", + "\n", + " ball_y ball_z ball_speed DFL-OBJ-002G3I_x ... DFL-OBJ-002G3I_d \\\n", + "0 0.26 0.06 0.00 0.35 ... None \n", + "1 0.26 0.08 65.59 0.34 ... None \n", + "2 0.33 0.09 65.16 0.32 ... None \n", + "3 0.38 0.09 74.34 0.31 ... None \n", + "4 0.44 0.08 73.58 0.29 ... None \n", + "\n", + " DFL-OBJ-002G3I_s DFL-OBJ-002G5S_x DFL-OBJ-002G5S_y DFL-OBJ-002G5S_d \\\n", + "0 0.00 NaN NaN None \n", + "1 1.74 NaN NaN None \n", + "2 1.76 NaN NaN None \n", + "3 1.78 NaN NaN None \n", + "4 1.80 NaN NaN None \n", + "\n", + " DFL-OBJ-002G5S_s DFL-OBJ-002FVJ_x DFL-OBJ-002FVJ_y DFL-OBJ-002FVJ_d \\\n", + "0 NaN NaN NaN None \n", + "1 NaN NaN NaN None \n", + "2 NaN NaN NaN None \n", + "3 NaN NaN NaN None \n", + "4 NaN NaN NaN None \n", + "\n", + " DFL-OBJ-002FVJ_s \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from kloppy import sportec\n", + "\n", + "\n", + "dataset = sportec.load_tracking(\n", + " raw_data=\"../../kloppy/tests/files/sportec_positional.xml\",\n", + " meta_data=\"../../kloppy/tests/files/sportec_meta.xml\",\n", + " \n", + " # Optional arguments\n", + " coordinates=\"sportec\",\n", + " only_alive=False\n", + ")\n", + "\n", + "dataset.to_df().head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "366f1126", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/kloppy/_providers/sportec.py b/kloppy/_providers/sportec.py index 9426fcde..2aff6625 100644 --- a/kloppy/_providers/sportec.py +++ b/kloppy/_providers/sportec.py @@ -1,17 +1,22 @@ from typing import Optional, List from kloppy.config import get_config -from kloppy.domain import EventDataset, EventFactory +from kloppy.domain import EventDataset, EventFactory, TrackingDataset from kloppy.infra.serializers.event.sportec import ( - SportecEventDeserializer, - SportecInputs, + SportecEventDataDeserializer, + SportecEventDataInputs, ) -from kloppy.io import open_as_file +from kloppy.infra.serializers.tracking.sportec import ( + SportecTrackingDataDeserializer, + SportecTrackingDataInputs, +) +from kloppy.io import open_as_file, FileLike +from kloppy.utils import deprecated -def load( - event_data: str, - meta_data: str, +def load_event( + event_data: FileLike, + meta_data: FileLike, event_types: Optional[List[str]] = None, coordinates: Optional[str] = None, event_factory: Optional[EventFactory] = None, @@ -27,7 +32,7 @@ def load( event_factory: """ - serializer = SportecEventDeserializer( + serializer = SportecEventDataDeserializer( event_types=event_types, coordinate_system=coordinates, event_factory=event_factory or get_config("event_factory"), @@ -36,5 +41,44 @@ def load( meta_data ) as meta_data_fp: return serializer.deserialize( - SportecInputs(event_data=event_data_fp, meta_data=meta_data_fp) + SportecEventDataInputs( + event_data=event_data_fp, meta_data=meta_data_fp + ) + ) + + +def load_tracking( + meta_data: FileLike, + raw_data: FileLike, + sample_rate: Optional[float] = None, + limit: Optional[int] = None, + coordinates: Optional[str] = None, + only_alive: Optional[bool] = True, +) -> TrackingDataset: + deserializer = SportecTrackingDataDeserializer( + sample_rate=sample_rate, + limit=limit, + coordinate_system=coordinates, + only_alive=only_alive, + ) + with open_as_file(meta_data) as meta_data_fp, open_as_file( + raw_data + ) as raw_data_fp: + return deserializer.deserialize( + inputs=SportecTrackingDataInputs( + meta_data=meta_data_fp, raw_data=raw_data_fp + ) ) + + +@deprecated("sportec.load_event should be used") +def load( + event_data: FileLike, + meta_data: FileLike, + event_types: Optional[List[str]] = None, + coordinates: Optional[str] = None, + event_factory: Optional[EventFactory] = None, +) -> EventDataset: + return load_event( + event_data, meta_data, event_types, coordinates, event_factory + ) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index 4b60b893..dabad000 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -17,6 +17,7 @@ Iterable, ) + if sys.version_info >= (3, 8): from typing import Literal else: @@ -518,7 +519,7 @@ def pitch_dimensions(self) -> PitchDimensions: @dataclass -class SportecCoordinateSystem(CoordinateSystem): +class SportecEventDataCoordinateSystem(CoordinateSystem): @property def provider(self) -> Provider: return Provider.SPORTEC @@ -541,6 +542,30 @@ def pitch_dimensions(self) -> PitchDimensions: ) +@dataclass +class SportecTrackingDataCoordinateSystem(CoordinateSystem): + @property + def provider(self) -> Provider: + return Provider.SPORTEC + + @property + def origin(self) -> Origin: + return Origin.CENTER + + @property + def vertical_orientation(self) -> VerticalOrientation: + return VerticalOrientation.BOTTOM_TO_TOP + + @property + def pitch_dimensions(self) -> PitchDimensions: + return PitchDimensions( + x_dim=Dimension(-self.length / 2, self.length / 2), + y_dim=Dimension(-self.width / 2, self.width / 2), + length=self.length, + width=self.width, + ) + + @dataclass class StatsBombCoordinateSystem(CoordinateSystem): @property @@ -654,7 +679,27 @@ def pitch_dimensions(self) -> PitchDimensions: ) -def build_coordinate_system(provider: Provider, **kwargs): +class DatasetType(Enum): + """ + DatasetType + + Attributes: + TRACKING (DatasetType): + EVENT (DatasetType): + CODE (DatasetType): + """ + + TRACKING = "TRACKING" + EVENT = "EVENT" + CODE = "CODE" + + def __repr__(self): + return self.value + + +def build_coordinate_system( + provider: Provider, dataset_type: DatasetType = DatasetType.EVENT, **kwargs +): if provider == Provider.TRACAB: return TracabCoordinateSystem(normalized=False, **kwargs) @@ -668,7 +713,12 @@ def build_coordinate_system(provider: Provider, **kwargs): return OptaCoordinateSystem(normalized=False, **kwargs) if provider == Provider.SPORTEC: - return SportecCoordinateSystem(normalized=False, **kwargs) + if dataset_type == DatasetType.TRACKING: + return SportecTrackingDataCoordinateSystem( + normalized=False, **kwargs + ) + else: + return SportecEventDataCoordinateSystem(normalized=False, **kwargs) if provider == Provider.STATSBOMB: return StatsBombCoordinateSystem(normalized=False, **kwargs) @@ -795,24 +845,6 @@ class Metadata: attributes: Optional[Dict] = field(default_factory=dict, compare=False) -class DatasetType(Enum): - """ - DatasetType - - Attributes: - TRACKING (DatasetType): - EVENT (DatasetType): - CODE (DatasetType): - """ - - TRACKING = "TRACKING" - EVENT = "EVENT" - CODE = "CODE" - - def __repr__(self): - return self.value - - T = TypeVar("T", bound="DataRecord") @@ -838,6 +870,9 @@ def __iter__(self): def __getitem__(self, item): return self.records[item] + def __len__(self): + return len(self.records) + def __post_init__(self): for i, record in enumerate(self.records): record.set_refs( diff --git a/kloppy/domain/services/__init__.py b/kloppy/domain/services/__init__.py index 3e3884d0..404a7f8c 100644 --- a/kloppy/domain/services/__init__.py +++ b/kloppy/domain/services/__init__.py @@ -2,7 +2,7 @@ from kloppy.domain import AttackingDirection, Frame, Ground -from .transformers import DatasetTransformer +from .transformers import DatasetTransformer, DatasetTransformerBuilder from .event_factory import EventFactory, create_event # NOT YET: from .enrichers import TrackingPossessionEnricher diff --git a/kloppy/domain/services/transformers/__init__.py b/kloppy/domain/services/transformers/__init__.py index 41ba032f..92dac6b3 100644 --- a/kloppy/domain/services/transformers/__init__.py +++ b/kloppy/domain/services/transformers/__init__.py @@ -1 +1 @@ -from .dataset import DatasetTransformer +from .dataset import DatasetTransformer, DatasetTransformerBuilder diff --git a/kloppy/domain/services/transformers/attribute.py b/kloppy/domain/services/transformers/attribute.py index 09bdcf02..8bdff050 100644 --- a/kloppy/domain/services/transformers/attribute.py +++ b/kloppy/domain/services/transformers/attribute.py @@ -274,6 +274,7 @@ def __call__(self, frame: Frame) -> Dict[str, Any]: ball_z=getattr(frame.ball_coordinates, "z", None) if frame.ball_coordinates else None, + ball_speed=frame.ball_speed, ) for player, player_data in frame.players_data.items(): row.update( diff --git a/kloppy/domain/services/transformers/dataset.py b/kloppy/domain/services/transformers/dataset.py index c8407655..3ddb1d46 100644 --- a/kloppy/domain/services/transformers/dataset.py +++ b/kloppy/domain/services/transformers/dataset.py @@ -1,4 +1,5 @@ from dataclasses import fields, replace + from kloppy.domain.models.tracking import PlayerData from typing import Union, Optional @@ -15,6 +16,9 @@ Team, TrackingDataset, CoordinateSystem, + Provider, + build_coordinate_system, + DatasetType, ) from kloppy.domain.models.event import Event from kloppy.exceptions import KloppyError @@ -431,3 +435,57 @@ def transform_dataset( ) else: raise KloppyError("Unknown Dataset type") + + +class DatasetTransformerBuilder: + def __init__( + self, to_coordinate_system: Optional[Union[str, Provider]] = None + ): + from kloppy.config import get_config + + if not to_coordinate_system: + to_coordinate_system = get_config("coordinate_system") + + if not to_coordinate_system: + to_coordinate_system = Provider.KLOPPY + + to_dataset_type = None + if isinstance(to_coordinate_system, str): + if ":" in to_coordinate_system: + provider_name, dataset_type_name = to_coordinate_system.split( + ":" + ) + to_coordinate_system = Provider[provider_name.upper()] + to_dataset_type = DatasetType[dataset_type_name.upper()] + else: + to_coordinate_system = Provider[to_coordinate_system.upper()] + + self.to_coordinate_system = to_coordinate_system + self.to_dataset_type = to_dataset_type + + def build( + self, + length: float, + width: float, + provider: Provider, + dataset_type: DatasetType, + ): + from_coordinate_system = build_coordinate_system( + # This comment forces black to keep the arguments as multi-line + provider, + length=length, + width=width, + dataset_type=dataset_type, + ) + + to_coordinate_system = build_coordinate_system( + self.to_coordinate_system, + length=length, + width=width, + dataset_type=self.to_dataset_type or dataset_type, + ) + + return DatasetTransformer( + from_coordinate_system=from_coordinate_system, + to_coordinate_system=to_coordinate_system, + ) diff --git a/kloppy/infra/serializers/event/deserializer.py b/kloppy/infra/serializers/event/deserializer.py index cec06b42..ce80705b 100644 --- a/kloppy/infra/serializers/event/deserializer.py +++ b/kloppy/infra/serializers/event/deserializer.py @@ -1,15 +1,15 @@ from abc import ABC, abstractmethod from typing import Optional, List, Generic, TypeVar, Union -from kloppy.config import get_config from kloppy.domain import ( EventDataset, Event, EventType, DatasetTransformer, Provider, - build_coordinate_system, EventFactory, + DatasetType, + DatasetTransformerBuilder, ) T = TypeVar("T") @@ -32,13 +32,7 @@ def __init__( for event_type in event_types ] - if not coordinate_system: - coordinate_system = get_config("coordinate_system") - - if isinstance(coordinate_system, str): - coordinate_system = Provider[coordinate_system.upper()] - - self.coordinate_system = coordinate_system + self.transformer_builder = DatasetTransformerBuilder(coordinate_system) if not event_factory: event_factory = EventFactory() @@ -50,23 +44,13 @@ def should_include_event(self, event: Event) -> bool: return event.event_type in self.event_types def get_transformer( - self, length: float, width: float + self, length: float, width: float, provider: Optional[Provider] = None ) -> DatasetTransformer: - from_coordinate_system = build_coordinate_system( - self.provider, - length=length, - width=width, - ) - - to_coordinate_system = build_coordinate_system( - self.coordinate_system, + return self.transformer_builder.build( length=length, width=width, - ) - - return DatasetTransformer( - from_coordinate_system=from_coordinate_system, - to_coordinate_system=to_coordinate_system, + provider=provider or self.provider, + dataset_type=DatasetType.EVENT, ) @property diff --git a/kloppy/infra/serializers/event/sportec/__init__.py b/kloppy/infra/serializers/event/sportec/__init__.py index d6faa3f4..1d3665c1 100644 --- a/kloppy/infra/serializers/event/sportec/__init__.py +++ b/kloppy/infra/serializers/event/sportec/__init__.py @@ -1 +1 @@ -from .deserializer import SportecEventDeserializer, SportecInputs +from .deserializer import SportecEventDataDeserializer, SportecEventDataInputs diff --git a/kloppy/infra/serializers/event/sportec/deserializer.py b/kloppy/infra/serializers/event/sportec/deserializer.py index 0f08c543..87cabab9 100644 --- a/kloppy/infra/serializers/event/sportec/deserializer.py +++ b/kloppy/infra/serializers/event/sportec/deserializer.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from typing import Tuple, Dict, List, NamedTuple, IO +from typing import Dict, List, NamedTuple, IO import logging from dateutil.parser import parse from lxml import objectify @@ -12,9 +12,6 @@ BallState, DatasetFlag, Orientation, - PassEvent, - ShotEvent, - GenericEvent, PassResult, ShotResult, EventType, @@ -29,12 +26,7 @@ BodyPartQualifier, BodyPart, Qualifier, - BallOutEvent, - RecoveryEvent, - SubstitutionEvent, - CardEvent, CardType, - FoulCommittedEvent, AttackingDirection, ) from kloppy.exceptions import DeserializationError @@ -74,6 +66,117 @@ def _team_from_xml_elm(team_elm) -> Team: return team +SPORTEC_FPS = 25 + +"""Sportec uses fixed starting frame ids for each half""" +SPORTEC_FIRST_HALF_STARTING_FRAME_ID = 10_000 +SPORTEC_SECOND_HALF_STARTING_FRAME_ID = 100_000 +SPORTEC_FIRST_EXTRA_HALF_STARTING_FRAME_ID = 200_000 +SPORTEC_SECOND_EXTRA_HALF_STARTING_FRAME_ID = 250_000 + + +class SportecMetadata(NamedTuple): + score: Score + teams: List[Team] + periods: List[Period] + x_max: float + y_max: float + fps: int + + +def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata: + """ + Load metadata from Sportec XML element. This part is shared between event- and tracking data. + In the future this might move to a common.sportec package that provides functionality for both + deserializers. + """ + x_max = float(match_root.MatchInformation.Environment.attrib["PitchX"]) + y_max = float(match_root.MatchInformation.Environment.attrib["PitchY"]) + + team_path = objectify.ObjectPath("PutDataRequest.MatchInformation.Teams") + team_elms = list(team_path.find(match_root).iterchildren("Team")) + + home_team = away_team = None + for team_elm in team_elms: + if team_elm.attrib["Role"] == "home": + home_team = _team_from_xml_elm(team_elm) + elif team_elm.attrib["Role"] == "guest": + away_team = _team_from_xml_elm(team_elm) + else: + raise DeserializationError( + f"Unknown side: {team_elm.attrib['Role']}" + ) + + if not home_team: + raise DeserializationError("Home team is missing from metadata") + if not away_team: + raise DeserializationError("Away team is missing from metadata") + + (home_score, away_score,) = match_root.MatchInformation.General.attrib[ + "Result" + ].split(":") + score = Score(home=int(home_score), away=int(away_score)) + teams = [home_team, away_team] + + if len(home_team.players) == 0 or len(away_team.players) == 0: + raise DeserializationError("LineUp incomplete") + + # The periods can be rebuild from event data. Therefore, the periods attribute + # from the metadata can be ignored. It is required for tracking data. + other_game_information = ( + match_root.MatchInformation.OtherGameInformation.attrib + ) + periods = [ + Period( + id=1, + start_timestamp=SPORTEC_FIRST_HALF_STARTING_FRAME_ID / SPORTEC_FPS, + end_timestamp=SPORTEC_FIRST_HALF_STARTING_FRAME_ID / SPORTEC_FPS + + float(other_game_information["TotalTimeFirstHalf"]) / 1000, + ), + Period( + id=2, + start_timestamp=SPORTEC_SECOND_HALF_STARTING_FRAME_ID + / SPORTEC_FPS, + end_timestamp=SPORTEC_SECOND_HALF_STARTING_FRAME_ID / SPORTEC_FPS + + float(other_game_information["TotalTimeSecondHalf"]) / 1000, + ), + ] + + if "TotalTimeFirstHalfExtra" in other_game_information: + # Add two periods for extra time. + periods.extend( + [ + Period( + id=3, + start_timestamp=SPORTEC_FIRST_EXTRA_HALF_STARTING_FRAME_ID + / SPORTEC_FPS, + end_timestamp=SPORTEC_FIRST_EXTRA_HALF_STARTING_FRAME_ID + / SPORTEC_FPS + + float(other_game_information["TotalTimeFirstHalfExtra"]) + / 1000, + ), + Period( + id=4, + start_timestamp=SPORTEC_SECOND_EXTRA_HALF_STARTING_FRAME_ID + / SPORTEC_FPS, + end_timestamp=SPORTEC_SECOND_EXTRA_HALF_STARTING_FRAME_ID + / SPORTEC_FPS + + float(other_game_information["TotalTimeSecondHalfExtra"]) + / 1000, + ), + ] + ) + + return SportecMetadata( + score=score, + teams=teams, + periods=periods, + x_max=x_max, + y_max=y_max, + fps=SPORTEC_FPS, + ) + + def _event_chain_from_xml_elm(event_elm): chain = OrderedDict() current_elm = event_elm @@ -259,55 +362,29 @@ def _parse_coordinates(event_attributes: Dict) -> Point: ) -class SportecInputs(NamedTuple): +class SportecEventDataInputs(NamedTuple): meta_data: IO[bytes] event_data: IO[bytes] -class SportecEventDeserializer(EventDataDeserializer[SportecInputs]): +class SportecEventDataDeserializer( + EventDataDeserializer[SportecEventDataInputs] +): @property def provider(self) -> Provider: return Provider.SPORTEC - def deserialize(self, inputs: SportecInputs) -> EventDataset: + def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset: with performance_logging("load data", logger=logger): match_root = objectify.fromstring(inputs.meta_data.read()) event_root = objectify.fromstring(inputs.event_data.read()) with performance_logging("parse data", logger=logger): - x_max = float( - match_root.MatchInformation.Environment.attrib["PitchX"] + sportec_metadata = sportec_metadata_from_xml_elm(match_root) + teams = home_team, away_team = sportec_metadata.teams + transformer = self.get_transformer( + length=sportec_metadata.x_max, width=sportec_metadata.y_max ) - y_max = float( - match_root.MatchInformation.Environment.attrib["PitchY"] - ) - - transformer = self.get_transformer(length=x_max, width=y_max) - - team_path = objectify.ObjectPath( - "PutDataRequest.MatchInformation.Teams" - ) - team_elms = list(team_path.find(match_root).iterchildren("Team")) - - for team_elm in team_elms: - if team_elm.attrib["Role"] == "home": - home_team = _team_from_xml_elm(team_elm) - elif team_elm.attrib["Role"] == "guest": - away_team = _team_from_xml_elm(team_elm) - else: - raise DeserializationError( - f"Unknown side: {team_elm.attrib['Role']}" - ) - - ( - home_score, - away_score, - ) = match_root.MatchInformation.General.attrib["Result"].split(":") - score = Score(home=int(home_score), away=int(away_score)) - teams = [home_team, away_team] - - if len(home_team.players) == 0 or len(away_team.players) == 0: - raise DeserializationError("LineUp incomplete") periods = [] period_id = 0 @@ -518,7 +595,7 @@ def deserialize(self, inputs: SportecInputs) -> EventDataset: teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, - score=score, + score=sportec_metadata.score, frame_rate=None, orientation=orientation, flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), diff --git a/kloppy/infra/serializers/tracking/deserializer.py b/kloppy/infra/serializers/tracking/deserializer.py index cc370cba..0635566c 100644 --- a/kloppy/infra/serializers/tracking/deserializer.py +++ b/kloppy/infra/serializers/tracking/deserializer.py @@ -4,8 +4,9 @@ from kloppy.domain import ( Provider, TrackingDataset, - build_coordinate_system, DatasetTransformer, + DatasetTransformerBuilder, + DatasetType, ) T = TypeVar("T") @@ -26,32 +27,16 @@ def __init__( sample_rate = 1.0 self.sample_rate = sample_rate - if not coordinate_system: - coordinate_system = Provider.KLOPPY - - if isinstance(coordinate_system, str): - coordinate_system = Provider[coordinate_system.upper()] - - self.coordinate_system = coordinate_system + self.transformer_builder = DatasetTransformerBuilder(coordinate_system) def get_transformer( self, length: float, width: float, provider: Optional[Provider] = None ) -> DatasetTransformer: - from_coordinate_system = build_coordinate_system( - provider or self.provider, - length=length, - width=width, - ) - - to_coordinate_system = build_coordinate_system( - self.coordinate_system, + return self.transformer_builder.build( length=length, width=width, - ) - - return DatasetTransformer( - from_coordinate_system=from_coordinate_system, - to_coordinate_system=to_coordinate_system, + provider=provider or self.provider, + dataset_type=DatasetType.TRACKING, ) @property diff --git a/kloppy/infra/serializers/tracking/sportec/__init__.py b/kloppy/infra/serializers/tracking/sportec/__init__.py new file mode 100644 index 00000000..245f9471 --- /dev/null +++ b/kloppy/infra/serializers/tracking/sportec/__init__.py @@ -0,0 +1,4 @@ +from .deserializer import ( + SportecTrackingDataDeserializer, + SportecTrackingDataInputs, +) diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py new file mode 100644 index 00000000..45b05b1f --- /dev/null +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -0,0 +1,232 @@ +import logging +from collections import defaultdict +from typing import NamedTuple, Optional, Union, IO + +from lxml import objectify + +from kloppy.domain import ( + TrackingDataset, + DatasetFlag, + AttackingDirection, + Frame, + Point, + Point3D, + BallState, + Period, + Orientation, + attacking_direction_from_frame, + Metadata, + Provider, + PlayerData, +) + +from kloppy.utils import performance_logging + +from ..deserializer import TrackingDataDeserializer +from kloppy.infra.serializers.event.sportec.deserializer import ( + sportec_metadata_from_xml_elm, +) + +logger = logging.getLogger(__name__) + +PERIOD_ID_TO_GAME_SECTION = { + 1: "firstHalf", + 2: "secondHalf", + 3: "firstHalfExtra", + 4: "secondHalfExtra", +} + + +def _read_section_data(data_root, period: Period) -> dict: + """ + Read all data for a single period from data_root. + + Output format: + { + 10_000: { + 'ball': { + 'N': "10000", + 'X': 20.92, + 'Y': 2.84, + 'Z': 0.08, + 'S': 4.91, + 'BallPossession': "2", + 'BallStatus': "1" + [...] + }, + 'DFL-OBJ-002G3I': { + 'N': "10000", + 'X': "0.35", + 'Y': "-25.26", + 'S': "0.00", + [...] + }, + [....] + }, + 10_001: { + ... + } + } + """ + + game_section = PERIOD_ID_TO_GAME_SECTION[period.id] + frame_sets = data_root.findall( + f"Positions/FrameSet[@GameSection='{game_section}']" + ) + + raw_frames = defaultdict(dict) + for frame_set in frame_sets: + key = ( + "ball" + if frame_set.attrib["TeamId"] == "BALL" + else frame_set.attrib["PersonId"] + ) + for frame in frame_set.iterchildren("Frame"): + attr = frame.attrib + frame_id = int(attr["N"]) + raw_frames[frame_id][key] = attr + + return raw_frames + + +class SportecTrackingDataInputs(NamedTuple): + meta_data: IO[bytes] + raw_data: IO[bytes] + + +class SportecTrackingDataDeserializer(TrackingDataDeserializer): + @property + def provider(self) -> Provider: + return Provider.SPORTEC + + def __init__( + self, + limit: Optional[int] = None, + sample_rate: Optional[float] = None, + coordinate_system: Optional[Union[str, Provider]] = None, + only_alive: Optional[bool] = True, + ): + super().__init__(limit, sample_rate, coordinate_system) + self.only_alive = only_alive + + def deserialize( + self, inputs: SportecTrackingDataInputs + ) -> TrackingDataset: + with performance_logging("load data", logger=logger): + match_root = objectify.fromstring(inputs.meta_data.read()) + data_root = objectify.fromstring(inputs.raw_data.read()) + + with performance_logging("parse metadata", logger=logger): + sportec_metadata = sportec_metadata_from_xml_elm(match_root) + teams = home_team, away_team = sportec_metadata.teams + periods = sportec_metadata.periods + transformer = self.get_transformer( + length=sportec_metadata.x_max, width=sportec_metadata.y_max + ) + + with performance_logging("parse raw data", logger=logger): + + def _iter(): + player_map = {} + for player in home_team.players: + player_map[player.player_id] = player + for player in away_team.players: + player_map[player.player_id] = player + + sample = 1.0 / self.sample_rate + + for period in periods: + raw_frames = _read_section_data(data_root, period) + + # Since python 3.6 dict keep insertion order. Don't need to sort + # on frame ID as it's already sorted. + # Ball FrameSet is always first and contains ALL frame ids. This + # makes sure even with substitutes the data is on order. + for i, (frame_id, frame_data) in enumerate( + sorted(raw_frames.items()) + ): + if "ball" not in frame_data: + # Frames without ball data are corrupt. + continue + + ball_data = frame_data["ball"] + if self.only_alive and ball_data["BallStatus"] != "1": + continue + + if i % sample == 0: + yield Frame( + frame_id=frame_id, + timestamp=( + ( + frame_id + # Do subtraction with integers to prevent floating errors + - period.start_timestamp + * sportec_metadata.fps + ) + / sportec_metadata.fps + ), + ball_owning_team=home_team + if ball_data["BallPossession"] == "1" + else away_team, + ball_state=BallState.ALIVE + if ball_data["BallStatus"] == "1" + else BallState.DEAD, + period=period, + players_data={ + player_map[player_id]: PlayerData( + coordinates=Point( + x=float(raw_player_data["X"]), + y=float(raw_player_data["Y"]), + ), + speed=float(raw_player_data["S"]), + ) + for player_id, raw_player_data in frame_data.items() + if player_id != "ball" + }, + other_data={}, + ball_coordinates=Point3D( + x=float(ball_data["X"]), + y=float(ball_data["Y"]), + z=float(ball_data["Z"]), + ), + ball_speed=float(ball_data["S"]), + ) + + frames = [] + for n, frame in enumerate(_iter()): + frame = transformer.transform_frame(frame) + + frames.append(frame) + + if not frame.period.attacking_direction_set: + frame.period.set_attacking_direction( + attacking_direction=attacking_direction_from_frame( + frame + ) + ) + + if self.limit and n >= self.limit: + break + + orientation = ( + Orientation.FIXED_HOME_AWAY + if periods[0].attacking_direction == AttackingDirection.HOME_AWAY + else Orientation.FIXED_AWAY_HOME + ) + + metadata = Metadata( + teams=teams, + periods=periods, + pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, + score=sportec_metadata.score, + frame_rate=sportec_metadata.fps, + orientation=orientation, + provider=Provider.SPORTEC, + flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, + coordinate_system=transformer.get_to_coordinate_system(), + ) + + return TrackingDataset( + records=frames, + metadata=metadata, + ) diff --git a/kloppy/sportec.py b/kloppy/sportec.py index 27cf35f6..79595791 100644 --- a/kloppy/sportec.py +++ b/kloppy/sportec.py @@ -1 +1 @@ -from ._providers.sportec import load +from ._providers.sportec import load, load_event, load_tracking diff --git a/kloppy/tests/files/sportec_positional.xml b/kloppy/tests/files/sportec_positional.xml new file mode 100644 index 00000000..8e85056d --- /dev/null +++ b/kloppy/tests/files/sportec_positional.xmldiff --git a/kloppy/tests/test_helpers.py b/kloppy/tests/test_helpers.py index d320b6fa..c687ed2a 100644 --- a/kloppy/tests/test_helpers.py +++ b/kloppy/tests/test_helpers.py @@ -296,6 +296,7 @@ def test_to_pandas(self): "ball_x": {0: 100, 1: 0}, "ball_y": {0: -50, 1: 50}, "ball_z": {0: 0, 1: 1}, + "ball_speed": {0: None, 1: None}, "home_1_x": {0: None, 1: 15.0}, "home_1_y": {0: None, 1: 35.0}, "home_1_d": {0: None, 1: 0.03}, @@ -348,6 +349,7 @@ def test_to_pandas_additional_columns(self): "ball_x": [100, 0], "ball_y": [-50, 50], "ball_z": [0, 1], + "ball_speed": [None, None], "match": ["test", "test"], "bonus_column": [11, 12], "home_1_x": [None, 15], diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py index c33b789e..95951cf7 100644 --- a/kloppy/tests/test_sportec.py +++ b/kloppy/tests/test_sportec.py @@ -11,12 +11,14 @@ SetPieceType, BodyPart, DatasetType, + BallState, + Point3D, ) from kloppy import sportec -class TestSportecEvent: +class TestSportecEventData: """""" @pytest.fixture @@ -27,9 +29,13 @@ def event_data(self, base_dir) -> str: def meta_data(self, base_dir) -> str: return base_dir / "files/sportec_meta.xml" - def test_correct_deserialization(self, event_data: Path, meta_data: Path): - dataset = sportec.load( - event_data=event_data, meta_data=meta_data, coordinates="sportec" + def test_correct_event_data_deserialization( + self, event_data: Path, meta_data: Path + ): + dataset = sportec.load_event( + event_data=event_data, + meta_data=meta_data, + coordinates="sportec", ) assert dataset.metadata.provider == Provider.SPORTEC @@ -69,9 +75,92 @@ def test_correct_deserialization(self, event_data: Path, meta_data: Path): assert dataset.events[0].coordinates == Point(56.41, 68.0) - def test_correct_normalized_deserialization( + def test_correct_normalized_event_data_deserialization( self, event_data: Path, meta_data: Path ): - dataset = sportec.load(event_data=event_data, meta_data=meta_data) + dataset = sportec.load_event( + event_data=event_data, meta_data=meta_data + ) assert dataset.events[0].coordinates == Point(0.5640999999999999, 1) + + +class TestSportecTrackingData: + """ + Tests for loading Sportec tracking data. + """ + + @pytest.fixture + def raw_data(self, base_dir) -> str: + return base_dir / "files/sportec_positional.xml" + + @pytest.fixture + def meta_data(self, base_dir) -> str: + return base_dir / "files/sportec_meta.xml" + + def test_load_metadata(self, raw_data: Path, meta_data: Path): + dataset = sportec.load_tracking( + raw_data=raw_data, meta_data=meta_data, coordinates="sportec" + ) + + assert dataset.metadata.provider == Provider.SPORTEC + assert dataset.dataset_type == DatasetType.TRACKING + assert len(dataset.metadata.periods) == 2 + + def test_load_frames(self, raw_data: Path, meta_data: Path): + dataset = sportec.load_tracking( + raw_data=raw_data, + meta_data=meta_data, + coordinates="sportec", + only_alive=False, + ) + home_team, away_team = dataset.metadata.teams + + assert dataset.frames[0].timestamp == 0.0 + assert dataset.frames[0].ball_owning_team == away_team + assert dataset.frames[0].ball_state == BallState.DEAD + assert dataset.frames[0].ball_coordinates == Point3D( + x=2.69, y=0.26, z=0.06 + ) + assert dataset.frames[1].ball_speed == 65.59 + + assert dataset.frames[1].ball_owning_team == home_team + assert dataset.frames[1].ball_state == BallState.ALIVE + + player_lilian = away_team.get_player_by_id("DFL-OBJ-002G3I") + player_data = dataset.frames[0].players_data[player_lilian] + + assert player_data.coordinates == Point(x=0.35, y=-25.26) + + # We don't load distance right now as it doesn't + # work together with `sample_rate`: "The distance covered from the previous frame in cm" + assert player_data.distance is None + + # Appears first in 27th frame + player_bensebaini = away_team.get_player_by_id("DFL-OBJ-002G5S") + assert player_bensebaini not in dataset.frames[0].players_data + assert player_bensebaini in dataset.frames[26].players_data + + # Contains all 3 players + assert len(dataset.frames[35].players_data) == 3 + assert len(dataset) == 202 + + second_period = dataset.metadata.periods[1] + for frame in dataset: + if frame.period == second_period: + assert ( + frame.timestamp == 0 + ), "First frame must start at timestamp 0.0" + break + else: + # No data found in second half + assert False + + def test_load_only_alive_frames(self, raw_data: Path, meta_data: Path): + dataset = sportec.load_tracking( + raw_data=raw_data, + meta_data=meta_data, + coordinates="sportec", + only_alive=True, + ) + assert len(dataset) == 199