Skip to content

Commit

Permalink
Merge pull request #71 from koenvo/sportec
Browse files Browse the repository at this point in the history
Sportec serializer
  • Loading branch information
koenvo authored Nov 19, 2020
2 parents 14c3b8e + 6f7ef62 commit 39914b5
Show file tree
Hide file tree
Showing 13 changed files with 843 additions and 17 deletions.
26 changes: 25 additions & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ from kloppy import (
load_epts_tracking_data,
load_statsbomb_event_data,
load_opta_event_data,
load_sportec_event_data,
to_pandas,
transform
)
Expand All @@ -89,7 +90,8 @@ dataset = load_statsbomb_event_data('event_data.json', 'lineup.json')
dataset = load_opta_event_data('f24_data.xml', 'f7_data.xml')
# metrica json
dataset = load_metrica_json_event_data('raw_data.json', 'meta.xml')

# sportec xml
dataset = load_sportec_event_data('events.xml', 'match_data.xml')

dataset = transform(dataset, to_pitch_dimensions=[[0, 108], [-34, 34]])
pandas_data_frame = to_pandas(dataset)
Expand Down Expand Up @@ -242,6 +244,28 @@ with open("eventdata.json", "rb") as event_data, \
```


or Sportec XML event data
```python
from kloppy import SportecEventSerializer

serializer = SportecEventSerializer()

with open("eventdata.xml", "rb") as event_data, \
open("match_data.xml", "rb") as match_data:

dataset = serializer.deserialize(
inputs={
'event_data': event_data,
'match_data': match_data
},
options={
"event_types": ["pass", "shot"]
}
)

# start working with dataset
```


### <a name="pitch-dimensions"></a>Transform the pitch dimensions
Data providers use their own pitch dimensions. Some use actual meters while others use 100x100. Use the Transformer to get from one pitch dimensions to another one.
Expand Down
4 changes: 4 additions & 0 deletions kloppy/domain/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class Provider(Enum):
TRACAB = "tracab"
OPTA = "opta"
STATSBOMB = "statsbomb"
SPORTEC = "sportec"

def __str__(self):
return self.value
Expand All @@ -37,6 +38,9 @@ class Position:
name: str
coordinates: Point

def __str__(self):
return self.name


@dataclass(frozen=True)
class Player:
Expand Down
11 changes: 10 additions & 1 deletion kloppy/domain/models/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
from enum import Enum
from typing import List, Union, Dict
from typing import List, Union, Dict, Type

from kloppy.domain.models.common import DatasetType
from kloppy.utils import camelcase_to_snakecase, removes_suffix
Expand Down Expand Up @@ -84,6 +84,8 @@ class EventType(Enum):

@dataclass
class Qualifier(ABC):
value: None

@abstractmethod
def to_dict(self):
pass
Expand Down Expand Up @@ -152,6 +154,13 @@ def event_name(self) -> str:
def create(cls, **kwargs):
return cls(**kwargs, state={})

def get_qualifier_value(self, qualifier_type: Type[Qualifier]):
if self.qualifiers:
for qualifier in self.qualifiers:
if isinstance(qualifier, qualifier_type):
return qualifier.value
return None


@dataclass
class GenericEvent(Event):
Expand Down
5 changes: 0 additions & 5 deletions kloppy/domain/services/state_builder/builders/lineup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,6 @@ class Lineup:

class LineupStateBuilder(StateBuilder):
def initial_state(self, dataset: EventDataset) -> Lineup:
if dataset.metadata.provider != Provider.STATSBOMB:
raise Exception(
"Lineup state can only be applied to statsbomb data"
)

return Lineup(
players=(
set(
Expand Down
3 changes: 2 additions & 1 deletion kloppy/domain/services/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ def transform_event(self, event: EventType) -> EventType:
position_changes = {
field.name: self.transform_point(getattr(event, field.name), flip)
for field in fields(event)
if field.name.endswith("position") and getattr(event, field.name)
if field.name.endswith("coordinates")
and getattr(event, field.name)
}

return replace(event, **position_changes)
Expand Down
40 changes: 33 additions & 7 deletions kloppy/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
MetricaEventsJsonSerializer,
MetricaTrackingSerializer,
OptaSerializer,
SportecEventSerializer,
StatsBombSerializer,
TRACABSerializer,
)
Expand Down Expand Up @@ -115,6 +116,20 @@ def load_metrica_json_event_data(
)


def load_sportec_event_data(
event_data_filename: str, match_data_filename: str, options: dict = None
) -> EventDataset:
serializer = SportecEventSerializer()
with open(event_data_filename, "rb") as event_data, open(
match_data_filename, "rb"
) as match_data:

return serializer.deserialize(
inputs={"event_data": event_data, "match_data": match_data},
options=options,
)


DatasetT = TypeVar("DatasetT")


Expand Down Expand Up @@ -177,7 +192,7 @@ def _event_to_pandas_row_converter(event: Event) -> Dict:
ball_owning_team=event.ball_owning_team.team_id
if event.ball_owning_team
else None,
team_id=event.team.team_id,
team_id=event.team.team_id if event.team else None,
player_id=event.player.player_id if event.player else None,
coordinates_x=event.coordinates.x if event.coordinates else None,
coordinates_y=event.coordinates.y if event.coordinates else None,
Expand All @@ -186,8 +201,12 @@ def _event_to_pandas_row_converter(event: Event) -> Dict:
row.update(
{
"end_timestamp": event.receive_timestamp,
"end_coordinates_x": event.receiver_coordinates.x,
"end_coordinates_y": event.receiver_coordinates.y,
"end_coordinates_x": event.receiver_coordinates.x
if event.receiver_coordinates
else None,
"end_coordinates_y": event.receiver_coordinates.y
if event.receiver_coordinates
else None,
"receiver_player_id": event.receiver_player.player_id
if event.receiver_player
else None,
Expand All @@ -197,8 +216,12 @@ def _event_to_pandas_row_converter(event: Event) -> Dict:
row.update(
{
"end_timestamp": event.end_timestamp,
"end_coordinates_x": event.end_coordinates.x,
"end_coordinates_y": event.end_coordinates.y,
"end_coordinates_x": event.end_coordinates.x
if event.end_coordinates
else None,
"end_coordinates_y": event.end_coordinates.y
if event.end_coordinates
else None,
}
)

Expand All @@ -225,13 +248,15 @@ def to_pandas(
if isinstance(dataset, Dataset):
records = dataset.records
elif isinstance(dataset, list):

records = dataset
else:
raise Exception("Unknown dataset type")

if not records:
return pd.DataFrame()

if not _record_converter:
if isinstance(dataset, TrackingDataset) or isinstance(
if isinstance(dataset, TrackingDataset) and isinstance(
records[0], Frame
):
_record_converter = _frame_to_pandas_row_converter
Expand Down Expand Up @@ -264,6 +289,7 @@ def generic_record_converter(record: Union[Frame, Event]):
"load_epts_tracking_data",
"load_statsbomb_event_data",
"load_opta_event_data",
"load_sportec_event_data",
"to_pandas",
"transform",
]
3 changes: 1 addition & 2 deletions kloppy/infra/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,5 @@
StatsBombSerializer,
OptaSerializer,
MetricaEventsJsonSerializer,
SportecEventSerializer,
)

# NOT YET: from .event import EventDataSerializer, MetricaEventSerializer
1 change: 1 addition & 0 deletions kloppy/infra/serializers/event/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .statsbomb import StatsBombSerializer
from .opta import OptaSerializer
from .metrica import MetricaEventsJsonSerializer
from .sportec import SportecEventSerializer
1 change: 1 addition & 0 deletions kloppy/infra/serializers/event/sportec/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .serializer import SportecEventSerializer
Loading

0 comments on commit 39914b5

Please sign in to comment.