From fc2834115a3eb4be55eeb96d429bbfb1a82f0050 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Thu, 23 Jul 2020 14:19:31 +0200 Subject: [PATCH] Fix to_pandas for opta event data --- CHANGES.txt | 3 +- kloppy/domain/models/event.py | 11 +++ kloppy/helpers.py | 2 +- .../serializers/event/opta/serializer.py | 84 ++++++++++++++++++- .../serializers/event/statsbomb/serializer.py | 2 +- kloppy/tests/test_helpers.py | 18 +++- setup.py | 2 +- 7 files changed, 116 insertions(+), 6 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 0bc81c68..a23a461a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -27,4 +27,5 @@ v0.6.0, 2020-06-18 -- Add Opta event serializer Fix for event pattern matching when multiple paths can match Improved ball_recovery example v0.6.1, 2020-07-02 -- Fix in readme (@rjtavares) - Add additional_columns to to_pandas (@rjtavares) \ No newline at end of file + Add additional_columns to to_pandas (@rjtavares) +v0.6.2, 2020-07-23 -- Fix to_pandas for Opta event data \ No newline at end of file diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index 1d39f6da..26ce30fa 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -82,9 +82,15 @@ class Event(DataRecord, ABC): def event_type(self) -> EventType: raise NotImplementedError + @property + @abstractmethod + def event_name(self) -> str: + raise NotImplementedError + @dataclass class GenericEvent(Event): + event_name: str = "generic" event_type: EventType = EventType.GENERIC @@ -93,6 +99,7 @@ class ShotEvent(Event): result: ShotResult event_type: EventType = EventType.SHOT + event_name: str = "shot" @dataclass @@ -104,6 +111,8 @@ class PassEvent(Event): result: PassResult event_type: EventType = EventType.PASS + event_name: str = "pass" + @dataclass @@ -111,6 +120,7 @@ class TakeOnEvent(Event): result: TakeOnResult event_type: EventType = EventType.TAKE_ON + event_name: str = "take-on" @dataclass @@ -121,6 +131,7 @@ class CarryEvent(Event): result: CarryResult event_type: EventType = EventType.CARRY + event_name: str = "carry" @dataclass diff --git a/kloppy/helpers.py b/kloppy/helpers.py index cda061c7..43fce11e 100644 --- a/kloppy/helpers.py +++ b/kloppy/helpers.py @@ -156,7 +156,7 @@ def _event_to_pandas_row_converter(event: Event) -> Dict: event_type=( event.event_type.value if event.event_type != EventType.GENERIC - else f"GENERIC:{event.raw_event['type']['name']}" + else f"GENERIC:{event.event_name}" ), result=event.result.value if event.result else None, success=event.result.is_success if event.result else None, diff --git a/kloppy/infra/serializers/event/opta/serializer.py b/kloppy/infra/serializers/event/opta/serializer.py index 3684ca3d..38194afc 100644 --- a/kloppy/infra/serializers/event/opta/serializer.py +++ b/kloppy/infra/serializers/event/opta/serializer.py @@ -93,6 +93,88 @@ def _parse_shot( EVENT_TYPE_SHOT_SAVED = 15 EVENT_TYPE_SHOT_GOAL = 16 +event_type_names = {1: 'pass', + 2: 'offside pass', + 3: 'take on', + 4: 'foul', + 5: 'out', + 6: 'corner awarded', + 7: 'tackle', + 8: 'interception', + 9: 'turnover', + 10: 'save', + 11: 'claim', + 12: 'clearance', + 13: 'miss', + 14: 'post', + 15: 'attempt saved', + 16: 'goal', + 17: 'card', + 18: 'player off', + 19: 'player on', + 20: 'player retired', + 21: 'player returns', + 22: 'player becomes goalkeeper', + 23: 'goalkeeper becomes player', + 24: 'condition change', + 25: 'official change', + 26: 'unknown26', + 27: 'start delay', + 28: 'end delay', + 29: 'unknown29', + 30: 'end', + 31: 'unknown31', + 32: 'start', + 33: 'unknown33', + 34: 'team set up', + 35: 'player changed position', + 36: 'player changed jersey number', + 37: 'collection end', + 38: 'temp_goal', + 39: 'temp_attempt', + 40: 'formation change', + 41: 'punch', + 42: 'good skill', + 43: 'deleted event', + 44: 'aerial', + 45: 'challenge', + 46: 'unknown46', + 47: 'rescinded card', + 48: 'unknown46', + 49: 'ball recovery', + 50: 'dispossessed', + 51: 'error', + 52: 'keeper pick-up', + 53: 'cross not claimed', + 54: 'smother', + 55: 'offside provoked', + 56: 'shield ball opp', + 57: 'foul throw in', + 58: 'penalty faced', + 59: 'keeper sweeper', + 60: 'chance missed', + 61: 'ball touch', + 62: 'unknown62', + 63: 'temp_save', + 64: 'resume', + 65: 'contentious referee decision', + 66: 'possession data', + 67: '50/50', + 68: 'referee drop ball', + 69: 'failed to block', + 70: 'injury time announcement', + 71: 'coach setup', + 72: 'caught offside', + 73: 'other ball contact', + 74: 'blocked pass', + 75: 'delayed start', + 76: 'early end', + 77: 'player off pitch'} + + +def _get_event_type_name(type_id: int) -> str: + return event_type_names.get(type_id, 'unknown') + class OptaSerializer(EventDataSerializer): @staticmethod @@ -303,7 +385,7 @@ def deserialize( event = ShotEvent(**kwargs) else: event = GenericEvent( - **generic_event_kwargs, result=None + **generic_event_kwargs, result=None, event_name=_get_event_type_name(type_id) ) if ( diff --git a/kloppy/infra/serializers/event/statsbomb/serializer.py b/kloppy/infra/serializers/event/statsbomb/serializer.py index a41fc52e..bba2ebe8 100644 --- a/kloppy/infra/serializers/event/statsbomb/serializer.py +++ b/kloppy/infra/serializers/event/statsbomb/serializer.py @@ -392,7 +392,7 @@ def deserialize( **generic_event_kwargs, ) else: - event = GenericEvent(result=None, **generic_event_kwargs) + event = GenericEvent(result=None, event_name=raw_event['type']['name'], **generic_event_kwargs) if ( not wanted_event_types diff --git a/kloppy/tests/test_helpers.py b/kloppy/tests/test_helpers.py index 2252e6b8..46a098bd 100644 --- a/kloppy/tests/test_helpers.py +++ b/kloppy/tests/test_helpers.py @@ -7,7 +7,7 @@ to_pandas, load_metrica_tracking_data, load_tracab_tracking_data, - transform, + transform, OptaSerializer, ) from kloppy.domain import ( Period, @@ -126,6 +126,22 @@ def test_to_pandas(self): assert_frame_equal(data_frame, expected_data_frame) + def test_to_pandas_generic_events(self): + base_dir = os.path.dirname(__file__) + + serializer = OptaSerializer() + + with open(f"{base_dir}/files/opta_f24.xml", "rb") as f24_data, open( + f"{base_dir}/files/opta_f7.xml", "rb" + ) as f7_data: + dataset = serializer.deserialize( + inputs={"f24_data": f24_data, "f7_data": f7_data} + ) + + dataframe = to_pandas(dataset) + dataframe = dataframe[dataframe.event_type == "GENERIC:out"] + assert dataframe.shape[0] == 2 + def test_to_pandas_additional_columns(self): tracking_data = self._get_tracking_dataset() diff --git a/setup.py b/setup.py index fe75d914..18377f2e 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="kloppy", - version="0.6.1", + version="0.6.2", author="Koen Vossen", author_email="info@koenvossen.nl", url="https://github.com/PySport/kloppy",