From 51653e7d30420721d352b87099d9654355f13a30 Mon Sep 17 00:00:00 2001 From: Bruno Date: Thu, 22 Oct 2020 11:26:08 -0300 Subject: [PATCH 1/8] Added improved standard events to Metrica serializer As part of the larger issue of adding more standard event to be able to better match the data format and event types between provier, i have aded in this commit the following standard events: - GOAL KICK - THROW IN - FREE KICK - CORNER KICK - KICK OFF - PENALTY - RECOVERY - FOUL COMMITTED - BALL OUT In the case of Metrica data, BALL OUT is a synthetic event that's added to the dataset. All the other ones replace the orginal event, for example, before a pass froma free kick was of event_type PASS, now it woul dbe of event_type FREE KICK. I just realized shots from penaly or free kicks are now correctly handled, will fixed that in a next commit. The same changes will also implemented in the other providers. --- kloppy/cmdline.py | 3 +- kloppy/domain/models/event.py | 30 ++++ .../domain/services/transformers/__init__.py | 30 ++-- kloppy/helpers.py | 3 +- .../event/metrica/json_serializer.py | 157 +++++++++++++----- .../serializers/event/opta/serializer.py | 26 +-- .../serializers/event/statsbomb/serializer.py | 5 +- .../serializers/tracking/epts/metadata.py | 3 +- kloppy/infra/serializers/tracking/tracab.py | 5 +- kloppy/tests/test_metrica.py | 6 +- 10 files changed, 176 insertions(+), 92 deletions(-) diff --git a/kloppy/cmdline.py b/kloppy/cmdline.py index 2e53680f..343d8518 100644 --- a/kloppy/cmdline.py +++ b/kloppy/cmdline.py @@ -80,8 +80,7 @@ def run_query(argv=sys.argv[1:]): help="StatsBomb event input files (events.json,lineup.json)", ) parser.add_argument( - "--input-opta", - help="Opta event input files (f24.xml,f7.xml)", + "--input-opta", help="Opta event input files (f24.xml,f7.xml)", ) parser.add_argument("--output-xml", help="Output file") parser.add_argument( diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index 1c4285af..e21da7c7 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -76,6 +76,15 @@ class EventType(Enum): CARD = "CARD" PLAYER_ON = "PLAYER_ON" PLAYER_OFF = "PLAYER_OFF" + RECOVERY = "RECOVERY" + BALL_OUT = "BALL_OUT" + FOUL_COMMITTED = "FOUL_COMMITTED" + GOAL_KICK = "GOAL_KICK" + FREE_KICK = "FREE_KICK" + THROW_IN = "THROW_IN" + CORNER_KICK = "CORNER_KICK" + PENALTY = "PENALTY" + KICK_OFF = "KICK_OFF" @dataclass @@ -178,6 +187,24 @@ class CardEvent(Event): event_name: str = "card" +@dataclass +class RecoveryEvent(Event): + event_type: EventType = EventType.RECOVERY + event_name: str = "recovery" + + +@dataclass +class BallOutEvent(Event): + event_type: EventType = EventType.BALL_OUT + event_name: str = "ball_out" + + +@dataclass +class FoulCommittedEvent(Event): + event_type: EventType = EventType.FOUL_COMMITTED + event_name: str = "foul_committed" + + @dataclass class EventDataset(Dataset): records: List[ @@ -225,4 +252,7 @@ def add_state(self, *args, **kwargs): "CardEvent", "CardType", "EventDataset", + "RecoveryEvent", + "FoulCommittedEvent", + "BallOutEvent", ] diff --git a/kloppy/domain/services/transformers/__init__.py b/kloppy/domain/services/transformers/__init__.py index 425ee1c2..4b95c8b2 100644 --- a/kloppy/domain/services/transformers/__init__.py +++ b/kloppy/domain/services/transformers/__init__.py @@ -56,19 +56,15 @@ def __needs_flip( if self._from_orientation == self._to_orientation: flip = False else: - orientation_factor_from = ( - self._from_orientation.get_orientation_factor( - ball_owning_team=ball_owning_team, - attacking_direction=attacking_direction, - action_executing_team=action_executing_team, - ) + orientation_factor_from = self._from_orientation.get_orientation_factor( + ball_owning_team=ball_owning_team, + attacking_direction=attacking_direction, + action_executing_team=action_executing_team, ) - orientation_factor_to = ( - self._to_orientation.get_orientation_factor( - ball_owning_team=ball_owning_team, - attacking_direction=attacking_direction, - action_executing_team=action_executing_team, - ) + orientation_factor_to = self._to_orientation.get_orientation_factor( + ball_owning_team=ball_owning_team, + attacking_direction=attacking_direction, + action_executing_team=action_executing_team, ) flip = orientation_factor_from != orientation_factor_to return flip @@ -153,16 +149,10 @@ def transform_dataset( for record in dataset.records ] - return TrackingDataset( - metadata=metadata, - records=frames, - ) + return TrackingDataset(metadata=metadata, records=frames,) elif isinstance(dataset, EventDataset): events = list(map(transformer.transform_event, dataset.records)) - return EventDataset( - metadata=metadata, - records=events, - ) + return EventDataset(metadata=metadata, records=events,) else: raise Exception("Unknown Dataset type") diff --git a/kloppy/helpers.py b/kloppy/helpers.py index 725313eb..3e80eb70 100644 --- a/kloppy/helpers.py +++ b/kloppy/helpers.py @@ -97,8 +97,7 @@ def load_opta_event_data( ) as f7_data: return serializer.deserialize( - inputs={"f24_data": f24_data, "f7_data": f7_data}, - options=options, + inputs={"f24_data": f24_data, "f7_data": f7_data}, options=options, ) diff --git a/kloppy/infra/serializers/event/metrica/json_serializer.py b/kloppy/infra/serializers/event/metrica/json_serializer.py index a79f3ff3..c1d745e1 100644 --- a/kloppy/infra/serializers/event/metrica/json_serializer.py +++ b/kloppy/infra/serializers/event/metrica/json_serializer.py @@ -12,6 +12,9 @@ ShotEvent, TakeOnEvent, CarryEvent, + RecoveryEvent, + FoulCommittedEvent, + BallOutEvent, GenericEvent, PassResult, ShotResult, @@ -42,6 +45,16 @@ MS_PASS_OUTCOME_OFFSIDE, ] +# Set Pieces +MS_SET_PIECE = 5 +MS_SET_PIECE_GOAL_KICK = 20 +MS_SET_PIECE_FREE_KICK = 32 +MS_SET_PIECE_THROW_IN = 34 +MS_SET_PIECE_CORNER_KICK = 33 +MS_SET_PIECE_PENALTY = 36 +MS_SET_PIECE_KICK_OFF = 35 + + # Shots MS_EVENT_TYPE_SHOT = 2 MS_SHOT_OUTCOME_BLOCKED = 25 @@ -63,6 +76,8 @@ MS_EVENT_TYPE_DRIBBLE = 45 MS_EVENT_TYPE_CARRY = 10 MS_EVENT_TYPE_CHALLENGE = 9 +MS_EVENT_TYPE_RECOVERY = 3 +MS_EVENT_TYPE_FOUL_COMMITTED = 4 MS_EVENT_TYPE_CARD = 8 @@ -72,10 +87,7 @@ def _parse_coordinates(event_start_or_end: dict) -> Point: if x is None: return None - return Point( - x=x, - y=y, - ) + return Point(x=x, y=y,) def _parse_subtypes(event: dict) -> List: @@ -88,38 +100,66 @@ def _parse_subtypes(event: dict) -> List: return None -def _parse_pass(event: Dict, subtypes: List, team: Team) -> Dict: +def _parse_pass( + event: Dict, previous_event: Dict, subtypes: List, team: Team +) -> Dict: - pass_type_id = event["type"]["id"] + event_type_id = event["type"]["id"] - if pass_type_id == MS_PASS_OUTCOME_COMPLETE: + if event_type_id == MS_PASS_OUTCOME_COMPLETE: result = PassResult.COMPLETE receiver_player = team.get_player_by_id(event["to"]["id"]) receiver_coordinates = _parse_coordinates(event["end"]) receive_timestamp = event["end"]["time"] else: - if pass_type_id == MS_PASS_OUTCOME_OUT: + if event_type_id == MS_PASS_OUTCOME_OUT: result = PassResult.OUT - elif pass_type_id == MS_PASS_OUTCOME_INCOMPLETE: + elif event_type_id == MS_PASS_OUTCOME_INCOMPLETE: if subtypes and MS_PASS_OUTCOME_OFFSIDE in subtypes: result = PassResult.OFFSIDE else: result = PassResult.INCOMPLETE else: - raise Exception(f"Unknown pass outcome: {pass_type_id}") + raise Exception(f"Unknown pass outcome: {event_type_id}") receiver_player = None receiver_coordinates = None receive_timestamp = None + pass_type = _get_pass_type(event, previous_event, subtypes) + return dict( result=result, receiver_coordinates=receiver_coordinates, receiver_player=receiver_player, receive_timestamp=receive_timestamp, + event_type=pass_type, ) +def _get_pass_type( + event: Dict, previous_event: Dict, subtypes: List +) -> EventType: + + previous_event_type_id = previous_event["type"]["id"] + if previous_event_type_id == MS_SET_PIECE: + set_piece_subtypes = _parse_subtypes(previous_event) + if MS_SET_PIECE_CORNER_KICK in set_piece_subtypes: + return EventType.CORNER_KICK + elif MS_SET_PIECE_FREE_KICK in set_piece_subtypes: + return EventType.FREE_KICK + elif MS_SET_PIECE_PENALTY in set_piece_subtypes: + return EventType.PENALTY + elif MS_SET_PIECE_THROW_IN in set_piece_subtypes: + return EventType.THROW_IN + elif MS_SET_PIECE_KICK_OFF in set_piece_subtypes: + return EventType.KICK_OFF + elif subtypes and MS_SET_PIECE_GOAL_KICK in subtypes: + return EventType.GOAL_KICK + else: + return EventType.PASS + + def _parse_shot(event: Dict, subtypes: List) -> Dict: if MS_SHOT_OUTCOME_OFF_TARGET in subtypes: result = ShotResult.OFF_TARGET @@ -236,7 +276,8 @@ def deserialize( ] events = [] - for raw_event in raw_events["data"]: + for i, raw_event in enumerate(raw_events["data"]): + if raw_event["team"]["id"] == metadata.teams[0].team_id: team = metadata.teams[0] elif raw_event["team"]["id"] == metadata.teams[1].team_id: @@ -269,56 +310,98 @@ def deserialize( raw_event=raw_event, ) + iteration_events = [] + if event_type in MS_PASS_TYPES: + previous_event = raw_events["data"][i - 1] pass_event_kwargs = _parse_pass( event=raw_event, + previous_event=previous_event, subtypes=subtypes, team=team, ) - event = PassEvent.create( - **pass_event_kwargs, - **generic_event_kwargs, + iteration_events.append( + PassEvent.create( + **pass_event_kwargs, **generic_event_kwargs, + ) ) elif event_type == MS_EVENT_TYPE_SHOT: shot_event_kwargs = _parse_shot( event=raw_event, subtypes=subtypes ) - event = ShotEvent.create( - **shot_event_kwargs, **generic_event_kwargs + iteration_events.append( + ShotEvent.create( + **shot_event_kwargs, **generic_event_kwargs + ) ) elif subtypes and MS_EVENT_TYPE_DRIBBLE in subtypes: take_on_event_kwargs = _parse_take_on(subtypes=subtypes) - event = TakeOnEvent.create( - **take_on_event_kwargs, **generic_event_kwargs + iteration_events.append( + TakeOnEvent.create( + **take_on_event_kwargs, **generic_event_kwargs + ) ) elif event_type == MS_EVENT_TYPE_CARRY: - carry_event_kwargs = _parse_carry( - event=raw_event, + carry_event_kwargs = _parse_carry(event=raw_event,) + iteration_events.append( + CarryEvent.create( + **carry_event_kwargs, **generic_event_kwargs, + ) ) - event = CarryEvent.create( - **carry_event_kwargs, - **generic_event_kwargs, + elif event_type == MS_EVENT_TYPE_RECOVERY: + iteration_events.append( + RecoveryEvent.create( + result=None, **generic_event_kwargs + ) ) - else: - event = GenericEvent.create( - result=None, - event_name=raw_event["type"]["name"], - **generic_event_kwargs, + elif event_type == MS_EVENT_TYPE_FOUL_COMMITTED: + iteration_events.append( + FoulCommittedEvent.create( + result=None, **generic_event_kwargs + ) + ) + elif event_type != MS_SET_PIECE: + iteration_events.append( + GenericEvent.create( + result=None, + event_name=raw_event["type"]["name"], + **generic_event_kwargs, + ) + ) + + # Checks if the event was a pass that ended out of the field to add a + # synthetic ball_out event. + if event_type == MS_PASS_OUTCOME_OUT: + iteration_events.append( + BallOutEvent.create( + result=None, + # from DataRecord + period=period, + timestamp=raw_event["end"]["time"], + ball_owning_team=_parse_ball_owning_team( + event_type, team + ), + ball_state=BallState.DEAD, + # from Event + event_id=None, + team=team, + player=player, + coordinates=(_parse_coordinates(raw_event["end"])), + raw_event=raw_event, + ) ) - if ( - not wanted_event_types - or event.event_type in wanted_event_types - ): - events.append(event) + for event in iteration_events: + if ( + not wanted_event_types + or event.event_type in wanted_event_types + ): + events.append(event) - return EventDataset( - metadata=metadata, - records=events, - ) + return EventDataset(metadata=metadata, records=events,) def serialize(self, data_set: EventDataset) -> Tuple[str, str]: raise NotImplementedError diff --git a/kloppy/infra/serializers/event/opta/serializer.py b/kloppy/infra/serializers/event/opta/serializer.py index 7493a334..4d8f225a 100644 --- a/kloppy/infra/serializers/event/opta/serializer.py +++ b/kloppy/infra/serializers/event/opta/serializer.py @@ -348,16 +348,8 @@ def deserialize( game_elm = f24_root.find("Game") periods = [ - Period( - id=1, - start_timestamp=None, - end_timestamp=None, - ), - Period( - id=2, - start_timestamp=None, - end_timestamp=None, - ), + Period(id=1, start_timestamp=None, end_timestamp=None,), + Period(id=2, start_timestamp=None, end_timestamp=None,), ] possession_team = None events = [] @@ -434,20 +426,17 @@ def deserialize( if type_id == EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass(qualifiers, outcome) event = PassEvent.create( - **pass_event_kwargs, - **generic_event_kwargs, + **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_OFFSIDE_PASS: pass_event_kwargs = _parse_offside_pass() event = PassEvent.create( - **pass_event_kwargs, - **generic_event_kwargs, + **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_TAKE_ON: take_on_event_kwargs = _parse_take_on(outcome) event = TakeOnEvent.create( - **take_on_event_kwargs, - **generic_event_kwargs, + **take_on_event_kwargs, **generic_event_kwargs, ) elif type_id in ( EVENT_TYPE_SHOT_MISS, @@ -490,10 +479,7 @@ def deserialize( provider=Provider.OPTA, ) - return EventDataset( - metadata=metadata, - records=events, - ) + return EventDataset(metadata=metadata, records=events,) def serialize(self, data_set: EventDataset) -> Tuple[str, str]: raise NotImplementedError diff --git a/kloppy/infra/serializers/event/statsbomb/serializer.py b/kloppy/infra/serializers/event/statsbomb/serializer.py index eda4ece0..2c17571b 100644 --- a/kloppy/infra/serializers/event/statsbomb/serializer.py +++ b/kloppy/infra/serializers/event/statsbomb/serializer.py @@ -532,10 +532,7 @@ def deserialize( provider=Provider.STATSBOMB, ) - return EventDataset( - metadata=metadata, - records=events, - ) + return EventDataset(metadata=metadata, records=events,) def serialize(self, data_set: EventDataset) -> Tuple[str, str]: raise NotImplementedError diff --git a/kloppy/infra/serializers/tracking/epts/metadata.py b/kloppy/infra/serializers/tracking/epts/metadata.py index 6f61ad20..3fa9e8c4 100644 --- a/kloppy/infra/serializers/tracking/epts/metadata.py +++ b/kloppy/infra/serializers/tracking/epts/metadata.py @@ -157,8 +157,7 @@ def _parse_provider(provider_name: Union[str, None]) -> Provider: return Provider.METRICA else: warnings.warn( - "The Provider is not known to Kloppy.", - Warning, + "The Provider is not known to Kloppy.", Warning, ) else: return None diff --git a/kloppy/infra/serializers/tracking/tracab.py b/kloppy/infra/serializers/tracking/tracab.py index 9eeeb96e..53a2d5df 100644 --- a/kloppy/infra/serializers/tracking/tracab.py +++ b/kloppy/infra/serializers/tracking/tracab.py @@ -242,10 +242,7 @@ def _iter(): flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, ) - return TrackingDataset( - records=frames, - metadata=metadata, - ) + return TrackingDataset(records=frames, metadata=metadata,) def serialize(self, dataset: TrackingDataset) -> Tuple[str, str]: raise NotImplementedError diff --git a/kloppy/tests/test_metrica.py b/kloppy/tests/test_metrica.py index 661a9c36..6c52f16f 100644 --- a/kloppy/tests/test_metrica.py +++ b/kloppy/tests/test_metrica.py @@ -7,6 +7,7 @@ AttackingDirection, Orientation, Point, + EventType, ) from kloppy.domain.models.common import DatasetType @@ -91,7 +92,7 @@ def test_correct_deserialization(self): assert dataset.metadata.provider == Provider.METRICA assert dataset.dataset_type == DatasetType.EVENT - assert len(dataset.events) == 3620 + assert len(dataset.events) == 3594 assert len(dataset.metadata.periods) == 2 assert dataset.metadata.orientation is None assert dataset.metadata.teams[0].name == "Team A" @@ -115,3 +116,6 @@ def test_correct_deserialization(self): end_timestamp=5742.12, attacking_direction=AttackingDirection.NOT_SET, ) + + # Make sure we are using the improved event types. + dataset.records[0].event_type == EventType.KICK_OFF From 7beee7d287bb378d5620f52ec0c1c70e96606c18 Mon Sep 17 00:00:00 2001 From: Bruno Date: Tue, 27 Oct 2020 12:59:37 -0300 Subject: [PATCH 2/8] Qualifiers approach to events to code for things like set pieces As discussed on #43, to be able to code for things like whether an event is a set pieces, we moved from encoding that on the name of the event, to adding qualifiers to the event. For now the qualifiers are only SetPieceType, but could accomodate for other things like Foot, or BodyPart for shots, etc. --- kloppy/domain/models/event.py | 6 +++ kloppy/helpers.py | 7 +++ .../event/metrica/json_serializer.py | 54 +++++++++++++------ kloppy/tests/test_metrica.py | 3 +- 4 files changed, 52 insertions(+), 18 deletions(-) diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index e21da7c7..6aedd214 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -79,6 +79,9 @@ class EventType(Enum): RECOVERY = "RECOVERY" BALL_OUT = "BALL_OUT" FOUL_COMMITTED = "FOUL_COMMITTED" + + +class SetPieceType(Enum): GOAL_KICK = "GOAL_KICK" FREE_KICK = "FREE_KICK" THROW_IN = "THROW_IN" @@ -99,6 +102,8 @@ class Event(DataRecord, ABC): raw_event: Dict state: Dict[str, any] + qualifiers: List[SetPieceType] + @property @abstractmethod def event_type(self) -> EventType: @@ -255,4 +260,5 @@ def add_state(self, *args, **kwargs): "RecoveryEvent", "FoulCommittedEvent", "BallOutEvent", + "SetPieceType", ] diff --git a/kloppy/helpers.py b/kloppy/helpers.py index 3e80eb70..3dc43743 100644 --- a/kloppy/helpers.py +++ b/kloppy/helpers.py @@ -24,6 +24,7 @@ EventType, Player, DataRecord, + SetPieceType, ) @@ -201,6 +202,12 @@ def _event_to_pandas_row_converter(event: Event) -> Dict: "end_coordinates_y": event.end_coordinates.y, } ) + + if event.qualifiers: + for qualifier in event.qualifiers: + if isinstance(qualifier, SetPieceType): + row.update({f"is_{qualifier.name}": True}) + return row diff --git a/kloppy/infra/serializers/event/metrica/json_serializer.py b/kloppy/infra/serializers/event/metrica/json_serializer.py index c1d745e1..4b03dbaf 100644 --- a/kloppy/infra/serializers/event/metrica/json_serializer.py +++ b/kloppy/infra/serializers/event/metrica/json_serializer.py @@ -21,6 +21,7 @@ TakeOnResult, CarryResult, EventType, + SetPieceType, ) from kloppy.infra.serializers.event import EventDataSerializer @@ -126,38 +127,39 @@ def _parse_pass( receiver_coordinates = None receive_timestamp = None - pass_type = _get_pass_type(event, previous_event, subtypes) + qualifiers = _get_pass_qualifiers(event, previous_event, subtypes) return dict( result=result, receiver_coordinates=receiver_coordinates, receiver_player=receiver_player, receive_timestamp=receive_timestamp, - event_type=pass_type, + qualifiers=qualifiers, ) -def _get_pass_type( +def _get_pass_qualifiers( event: Dict, previous_event: Dict, subtypes: List ) -> EventType: previous_event_type_id = previous_event["type"]["id"] + qualifiers = [] if previous_event_type_id == MS_SET_PIECE: set_piece_subtypes = _parse_subtypes(previous_event) if MS_SET_PIECE_CORNER_KICK in set_piece_subtypes: - return EventType.CORNER_KICK + qualifiers.append(SetPieceType.CORNER_KICK) elif MS_SET_PIECE_FREE_KICK in set_piece_subtypes: - return EventType.FREE_KICK + qualifiers.append(SetPieceType.FREE_KICK) elif MS_SET_PIECE_PENALTY in set_piece_subtypes: - return EventType.PENALTY + qualifiers.append(SetPieceType.PENALTY) elif MS_SET_PIECE_THROW_IN in set_piece_subtypes: - return EventType.THROW_IN + qualifiers.append(SetPieceType.THROW_IN) elif MS_SET_PIECE_KICK_OFF in set_piece_subtypes: - return EventType.KICK_OFF + qualifiers.append(SetPieceType.KICK_OFF) elif subtypes and MS_SET_PIECE_GOAL_KICK in subtypes: - return EventType.GOAL_KICK - else: - return EventType.PASS + qualifiers.append(SetPieceType.GOAL_KICK) + + return qualifiers def _parse_shot(event: Dict, subtypes: List) -> Dict: @@ -333,7 +335,9 @@ def deserialize( ) iteration_events.append( ShotEvent.create( - **shot_event_kwargs, **generic_event_kwargs + qualifiers=None, + **shot_event_kwargs, + **generic_event_kwargs, ) ) @@ -341,32 +345,41 @@ def deserialize( take_on_event_kwargs = _parse_take_on(subtypes=subtypes) iteration_events.append( TakeOnEvent.create( - **take_on_event_kwargs, **generic_event_kwargs + qualifiers=None, + **take_on_event_kwargs, + **generic_event_kwargs, ) ) elif event_type == MS_EVENT_TYPE_CARRY: carry_event_kwargs = _parse_carry(event=raw_event,) iteration_events.append( CarryEvent.create( - **carry_event_kwargs, **generic_event_kwargs, + qualifiers=None, + **carry_event_kwargs, + **generic_event_kwargs, ) ) elif event_type == MS_EVENT_TYPE_RECOVERY: iteration_events.append( RecoveryEvent.create( - result=None, **generic_event_kwargs + result=None, + qualifiers=None, + **generic_event_kwargs, ) ) elif event_type == MS_EVENT_TYPE_FOUL_COMMITTED: iteration_events.append( FoulCommittedEvent.create( - result=None, **generic_event_kwargs + result=None, + qualifiers=None, + **generic_event_kwargs, ) ) elif event_type != MS_SET_PIECE: iteration_events.append( GenericEvent.create( result=None, + qualifiers=None, event_name=raw_event["type"]["name"], **generic_event_kwargs, ) @@ -375,9 +388,16 @@ def deserialize( # Checks if the event was a pass that ended out of the field to add a # synthetic ball_out event. if event_type == MS_PASS_OUTCOME_OUT: + + if raw_event["end"]["x"]: + coordinates = _parse_coordinates(raw_event["end"]) + else: + coordinates = _parse_coordinates(raw_event["start"]) + iteration_events.append( BallOutEvent.create( result=None, + qualifiers=None, # from DataRecord period=period, timestamp=raw_event["end"]["time"], @@ -389,7 +409,7 @@ def deserialize( event_id=None, team=team, player=player, - coordinates=(_parse_coordinates(raw_event["end"])), + coordinates=coordinates, raw_event=raw_event, ) ) diff --git a/kloppy/tests/test_metrica.py b/kloppy/tests/test_metrica.py index 6c52f16f..c03bc31e 100644 --- a/kloppy/tests/test_metrica.py +++ b/kloppy/tests/test_metrica.py @@ -8,6 +8,7 @@ Orientation, Point, EventType, + SetPieceType, ) from kloppy.domain.models.common import DatasetType @@ -118,4 +119,4 @@ def test_correct_deserialization(self): ) # Make sure we are using the improved event types. - dataset.records[0].event_type == EventType.KICK_OFF + dataset.records[0].qualifiers[0] == SetPieceType.KICK_OFF From 6d16aea6baad1c9f6096fae8d16c20a0632a394d Mon Sep 17 00:00:00 2001 From: Bruno Date: Tue, 27 Oct 2020 14:01:49 -0300 Subject: [PATCH 3/8] Updated local env to latest black version --- .pre-commit-config.yaml | 2 +- kloppy/cmdline.py | 3 +- .../domain/services/transformers/__init__.py | 30 ++++++++++++------- kloppy/helpers.py | 3 +- .../event/metrica/json_serializer.py | 17 ++++++++--- .../serializers/event/opta/serializer.py | 26 ++++++++++++---- .../serializers/event/statsbomb/serializer.py | 5 +++- .../serializers/tracking/epts/metadata.py | 3 +- kloppy/infra/serializers/tracking/tracab.py | 5 +++- 9 files changed, 68 insertions(+), 26 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b9e8bd0a..d01f1bea 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/ambv/black - rev: stable + rev: 20.8b1 hooks: - id: black language_version: python3 \ No newline at end of file diff --git a/kloppy/cmdline.py b/kloppy/cmdline.py index 343d8518..2e53680f 100644 --- a/kloppy/cmdline.py +++ b/kloppy/cmdline.py @@ -80,7 +80,8 @@ def run_query(argv=sys.argv[1:]): help="StatsBomb event input files (events.json,lineup.json)", ) parser.add_argument( - "--input-opta", help="Opta event input files (f24.xml,f7.xml)", + "--input-opta", + help="Opta event input files (f24.xml,f7.xml)", ) parser.add_argument("--output-xml", help="Output file") parser.add_argument( diff --git a/kloppy/domain/services/transformers/__init__.py b/kloppy/domain/services/transformers/__init__.py index 4b95c8b2..425ee1c2 100644 --- a/kloppy/domain/services/transformers/__init__.py +++ b/kloppy/domain/services/transformers/__init__.py @@ -56,15 +56,19 @@ def __needs_flip( if self._from_orientation == self._to_orientation: flip = False else: - orientation_factor_from = self._from_orientation.get_orientation_factor( - ball_owning_team=ball_owning_team, - attacking_direction=attacking_direction, - action_executing_team=action_executing_team, + orientation_factor_from = ( + self._from_orientation.get_orientation_factor( + ball_owning_team=ball_owning_team, + attacking_direction=attacking_direction, + action_executing_team=action_executing_team, + ) ) - orientation_factor_to = self._to_orientation.get_orientation_factor( - ball_owning_team=ball_owning_team, - attacking_direction=attacking_direction, - action_executing_team=action_executing_team, + orientation_factor_to = ( + self._to_orientation.get_orientation_factor( + ball_owning_team=ball_owning_team, + attacking_direction=attacking_direction, + action_executing_team=action_executing_team, + ) ) flip = orientation_factor_from != orientation_factor_to return flip @@ -149,10 +153,16 @@ def transform_dataset( for record in dataset.records ] - return TrackingDataset(metadata=metadata, records=frames,) + return TrackingDataset( + metadata=metadata, + records=frames, + ) elif isinstance(dataset, EventDataset): events = list(map(transformer.transform_event, dataset.records)) - return EventDataset(metadata=metadata, records=events,) + return EventDataset( + metadata=metadata, + records=events, + ) else: raise Exception("Unknown Dataset type") diff --git a/kloppy/helpers.py b/kloppy/helpers.py index 3dc43743..49fddb3d 100644 --- a/kloppy/helpers.py +++ b/kloppy/helpers.py @@ -98,7 +98,8 @@ def load_opta_event_data( ) as f7_data: return serializer.deserialize( - inputs={"f24_data": f24_data, "f7_data": f7_data}, options=options, + inputs={"f24_data": f24_data, "f7_data": f7_data}, + options=options, ) diff --git a/kloppy/infra/serializers/event/metrica/json_serializer.py b/kloppy/infra/serializers/event/metrica/json_serializer.py index 4b03dbaf..839e9820 100644 --- a/kloppy/infra/serializers/event/metrica/json_serializer.py +++ b/kloppy/infra/serializers/event/metrica/json_serializer.py @@ -88,7 +88,10 @@ def _parse_coordinates(event_start_or_end: dict) -> Point: if x is None: return None - return Point(x=x, y=y,) + return Point( + x=x, + y=y, + ) def _parse_subtypes(event: dict) -> List: @@ -325,7 +328,8 @@ def deserialize( iteration_events.append( PassEvent.create( - **pass_event_kwargs, **generic_event_kwargs, + **pass_event_kwargs, + **generic_event_kwargs, ) ) @@ -351,7 +355,9 @@ def deserialize( ) ) elif event_type == MS_EVENT_TYPE_CARRY: - carry_event_kwargs = _parse_carry(event=raw_event,) + carry_event_kwargs = _parse_carry( + event=raw_event, + ) iteration_events.append( CarryEvent.create( qualifiers=None, @@ -421,7 +427,10 @@ def deserialize( ): events.append(event) - return EventDataset(metadata=metadata, records=events,) + return EventDataset( + metadata=metadata, + records=events, + ) def serialize(self, data_set: EventDataset) -> Tuple[str, str]: raise NotImplementedError diff --git a/kloppy/infra/serializers/event/opta/serializer.py b/kloppy/infra/serializers/event/opta/serializer.py index 4d8f225a..7493a334 100644 --- a/kloppy/infra/serializers/event/opta/serializer.py +++ b/kloppy/infra/serializers/event/opta/serializer.py @@ -348,8 +348,16 @@ def deserialize( game_elm = f24_root.find("Game") periods = [ - Period(id=1, start_timestamp=None, end_timestamp=None,), - Period(id=2, start_timestamp=None, end_timestamp=None,), + Period( + id=1, + start_timestamp=None, + end_timestamp=None, + ), + Period( + id=2, + start_timestamp=None, + end_timestamp=None, + ), ] possession_team = None events = [] @@ -426,17 +434,20 @@ def deserialize( if type_id == EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass(qualifiers, outcome) event = PassEvent.create( - **pass_event_kwargs, **generic_event_kwargs, + **pass_event_kwargs, + **generic_event_kwargs, ) elif type_id == EVENT_TYPE_OFFSIDE_PASS: pass_event_kwargs = _parse_offside_pass() event = PassEvent.create( - **pass_event_kwargs, **generic_event_kwargs, + **pass_event_kwargs, + **generic_event_kwargs, ) elif type_id == EVENT_TYPE_TAKE_ON: take_on_event_kwargs = _parse_take_on(outcome) event = TakeOnEvent.create( - **take_on_event_kwargs, **generic_event_kwargs, + **take_on_event_kwargs, + **generic_event_kwargs, ) elif type_id in ( EVENT_TYPE_SHOT_MISS, @@ -479,7 +490,10 @@ def deserialize( provider=Provider.OPTA, ) - return EventDataset(metadata=metadata, records=events,) + return EventDataset( + metadata=metadata, + records=events, + ) def serialize(self, data_set: EventDataset) -> Tuple[str, str]: raise NotImplementedError diff --git a/kloppy/infra/serializers/event/statsbomb/serializer.py b/kloppy/infra/serializers/event/statsbomb/serializer.py index 2c17571b..eda4ece0 100644 --- a/kloppy/infra/serializers/event/statsbomb/serializer.py +++ b/kloppy/infra/serializers/event/statsbomb/serializer.py @@ -532,7 +532,10 @@ def deserialize( provider=Provider.STATSBOMB, ) - return EventDataset(metadata=metadata, records=events,) + return EventDataset( + metadata=metadata, + records=events, + ) def serialize(self, data_set: EventDataset) -> Tuple[str, str]: raise NotImplementedError diff --git a/kloppy/infra/serializers/tracking/epts/metadata.py b/kloppy/infra/serializers/tracking/epts/metadata.py index 3fa9e8c4..6f61ad20 100644 --- a/kloppy/infra/serializers/tracking/epts/metadata.py +++ b/kloppy/infra/serializers/tracking/epts/metadata.py @@ -157,7 +157,8 @@ def _parse_provider(provider_name: Union[str, None]) -> Provider: return Provider.METRICA else: warnings.warn( - "The Provider is not known to Kloppy.", Warning, + "The Provider is not known to Kloppy.", + Warning, ) else: return None diff --git a/kloppy/infra/serializers/tracking/tracab.py b/kloppy/infra/serializers/tracking/tracab.py index 53a2d5df..9eeeb96e 100644 --- a/kloppy/infra/serializers/tracking/tracab.py +++ b/kloppy/infra/serializers/tracking/tracab.py @@ -242,7 +242,10 @@ def _iter(): flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, ) - return TrackingDataset(records=frames, metadata=metadata,) + return TrackingDataset( + records=frames, + metadata=metadata, + ) def serialize(self, dataset: TrackingDataset) -> Tuple[str, str]: raise NotImplementedError From f52ac91b47ca9c5306831f51cd8c1bb5292a5a6b Mon Sep 17 00:00:00 2001 From: Bruno Date: Tue, 3 Nov 2020 13:17:14 -0300 Subject: [PATCH 4/8] Final implementation of expanded events for Metrica Serializer In this implementation we chaged: 1. We moved from one hot encoding for set pieces qualifiers to one single column using EnumQualifiers. 2. We added a class Qualifiers, with child classes BoolQualifier and EnumQualifier to allow for different types of qualifiers. --- kloppy/domain/models/event.py | 35 +++++++++++++++++++ kloppy/helpers.py | 3 +- .../event/metrica/json_serializer.py | 15 ++++---- kloppy/tests/test_metrica.py | 2 +- kloppy/utils.py | 4 +++ 5 files changed, 50 insertions(+), 9 deletions(-) diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index 6aedd214..27836a1e 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -5,6 +5,7 @@ from typing import List, Union, Dict from kloppy.domain.models.common import DatasetType +from kloppy.utils import camelcase_to_snakecase, removes_suffix from .common import DataRecord, Dataset, Team, Player from .pitch import Point @@ -81,6 +82,34 @@ class EventType(Enum): FOUL_COMMITTED = "FOUL_COMMITTED" +@dataclass +class Qualifier(ABC): + @abstractmethod + def to_dict(self): + pass + + @property + def name(self): + return camelcase_to_snakecase( + removes_suffix(type(self).__name__, "Qualifier") + ) + + +@dataclass +class BoolQualifier(Qualifier, ABC): + value: bool + + def to_dict(self): + return {f"is_{self.name}": self.value} + + +class EnumQualifier(Qualifier, ABC): + value: Enum + + def to_dict(self): + return {f"{self.name}_type": self.value.value} + + class SetPieceType(Enum): GOAL_KICK = "GOAL_KICK" FREE_KICK = "FREE_KICK" @@ -90,6 +119,11 @@ class SetPieceType(Enum): KICK_OFF = "KICK_OFF" +@dataclass +class SetPieceQualifier(EnumQualifier): + value: SetPieceType + + @dataclass class Event(DataRecord, ABC): event_id: str @@ -261,4 +295,5 @@ def add_state(self, *args, **kwargs): "FoulCommittedEvent", "BallOutEvent", "SetPieceType", + "SetPieceQualifier", ] diff --git a/kloppy/helpers.py b/kloppy/helpers.py index 49fddb3d..e31958b4 100644 --- a/kloppy/helpers.py +++ b/kloppy/helpers.py @@ -206,8 +206,7 @@ def _event_to_pandas_row_converter(event: Event) -> Dict: if event.qualifiers: for qualifier in event.qualifiers: - if isinstance(qualifier, SetPieceType): - row.update({f"is_{qualifier.name}": True}) + row.update(qualifier.to_dict()) return row diff --git a/kloppy/infra/serializers/event/metrica/json_serializer.py b/kloppy/infra/serializers/event/metrica/json_serializer.py index 839e9820..e8fbe4d0 100644 --- a/kloppy/infra/serializers/event/metrica/json_serializer.py +++ b/kloppy/infra/serializers/event/metrica/json_serializer.py @@ -22,6 +22,7 @@ CarryResult, EventType, SetPieceType, + SetPieceQualifier, ) from kloppy.infra.serializers.event import EventDataSerializer @@ -150,17 +151,19 @@ def _get_pass_qualifiers( if previous_event_type_id == MS_SET_PIECE: set_piece_subtypes = _parse_subtypes(previous_event) if MS_SET_PIECE_CORNER_KICK in set_piece_subtypes: - qualifiers.append(SetPieceType.CORNER_KICK) + qualifiers.append( + SetPieceQualifier(value=SetPieceType.CORNER_KICK) + ) elif MS_SET_PIECE_FREE_KICK in set_piece_subtypes: - qualifiers.append(SetPieceType.FREE_KICK) + qualifiers.append(SetPieceQualifier(value=SetPieceType.FREE_KICK)) elif MS_SET_PIECE_PENALTY in set_piece_subtypes: - qualifiers.append(SetPieceType.PENALTY) + qualifiers.append(SetPieceQualifier(value=SetPieceType.PENALTY)) elif MS_SET_PIECE_THROW_IN in set_piece_subtypes: - qualifiers.append(SetPieceType.THROW_IN) + qualifiers.append(SetPieceQualifier(value=SetPieceType.THROW_IN)) elif MS_SET_PIECE_KICK_OFF in set_piece_subtypes: - qualifiers.append(SetPieceType.KICK_OFF) + qualifiers.append(SetPieceQualifier(value=SetPieceType.KICK_OFF)) elif subtypes and MS_SET_PIECE_GOAL_KICK in subtypes: - qualifiers.append(SetPieceType.GOAL_KICK) + qualifiers.append(SetPieceQualifier(value=SetPieceType.GOAL_KICK)) return qualifiers diff --git a/kloppy/tests/test_metrica.py b/kloppy/tests/test_metrica.py index c03bc31e..11078a09 100644 --- a/kloppy/tests/test_metrica.py +++ b/kloppy/tests/test_metrica.py @@ -119,4 +119,4 @@ def test_correct_deserialization(self): ) # Make sure we are using the improved event types. - dataset.records[0].qualifiers[0] == SetPieceType.KICK_OFF + dataset.records[0].qualifiers[0].value == SetPieceType.KICK_OFF diff --git a/kloppy/utils.py b/kloppy/utils.py index b88a67f5..01a69f29 100644 --- a/kloppy/utils.py +++ b/kloppy/utils.py @@ -44,3 +44,7 @@ def camelcase_to_snakecase(name): """Convert camel-case string to snake-case.""" s1 = _first_cap_re.sub(r"\1_\2", name) return _all_cap_re.sub(r"\1_\2", s1).lower() + + +def removes_suffix(string, suffix): + return string[: -len(suffix)] From e180914312a66026181ec597a70e6cda3b9c8b8d Mon Sep 17 00:00:00 2001 From: Bruno Date: Wed, 4 Nov 2020 22:59:42 -0300 Subject: [PATCH 5/8] Added generic events to StatsBomb serializer For Recoveries I used the ones as defined by Statsbomb. For BallOut events I used either a pass out or a take-on out. I added the end location for passes that are not completed, as that was not included before but the data was available. --- kloppy/domain/models/event.py | 1 + .../event/metrica/json_serializer.py | 146 ++++++++---------- .../serializers/event/statsbomb/serializer.py | 107 +++++++++++-- kloppy/tests/test_metrica.py | 4 +- kloppy/tests/test_statsbomb.py | 2 +- 5 files changed, 165 insertions(+), 95 deletions(-) diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index 27836a1e..b8ffd797 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -295,5 +295,6 @@ def add_state(self, *args, **kwargs): "FoulCommittedEvent", "BallOutEvent", "SetPieceType", + "Qualifier", "SetPieceQualifier", ] diff --git a/kloppy/infra/serializers/event/metrica/json_serializer.py b/kloppy/infra/serializers/event/metrica/json_serializer.py index e8fbe4d0..e6f7db55 100644 --- a/kloppy/infra/serializers/event/metrica/json_serializer.py +++ b/kloppy/infra/serializers/event/metrica/json_serializer.py @@ -23,6 +23,8 @@ EventType, SetPieceType, SetPieceQualifier, + Qualifier, + Event, ) from kloppy.infra.serializers.event import EventDataSerializer @@ -82,6 +84,8 @@ MS_EVENT_TYPE_FOUL_COMMITTED = 4 MS_EVENT_TYPE_CARD = 8 +OUT_EVENT_RESULTS = [PassResult.OUT] + def _parse_coordinates(event_start_or_end: dict) -> Point: x = event_start_or_end["x"] @@ -131,7 +135,7 @@ def _parse_pass( receiver_coordinates = None receive_timestamp = None - qualifiers = _get_pass_qualifiers(event, previous_event, subtypes) + qualifiers = _get_event_qualifiers(event, previous_event, subtypes) return dict( result=result, @@ -142,9 +146,9 @@ def _parse_pass( ) -def _get_pass_qualifiers( +def _get_event_qualifiers( event: Dict, previous_event: Dict, subtypes: List -) -> EventType: +) -> List[Qualifier]: previous_event_type_id = previous_event["type"]["id"] qualifiers = [] @@ -168,7 +172,7 @@ def _get_pass_qualifiers( return qualifiers -def _parse_shot(event: Dict, subtypes: List) -> Dict: +def _parse_shot(event: Dict, previous_event: Dict, subtypes: List) -> Dict: if MS_SHOT_OUTCOME_OFF_TARGET in subtypes: result = ShotResult.OFF_TARGET elif MS_SHOT_OUTCOME_SAVED in subtypes: @@ -182,7 +186,9 @@ def _parse_shot(event: Dict, subtypes: List) -> Dict: else: raise Exception(f"Unknown shot outcome") - return dict(result=result) + qualifiers = _get_event_qualifiers(event, previous_event, subtypes) + + return dict(result=result, qualifiers=qualifiers) def _parse_carry(event: Dict) -> Dict: @@ -212,6 +218,10 @@ def _parse_ball_owning_team(event_type: int, team: Team) -> Team: return None +def _include_event(event: Event, wanted_event_types: List) -> bool: + return not wanted_event_types or event.event_type in wanted_event_types + + class MetricaEventsJsonSerializer(EventDataSerializer): @staticmethod def __validate_inputs(inputs: Dict[str, Readable]): @@ -303,6 +313,7 @@ def deserialize( for period in metadata.periods if period.id == raw_event["period"] ][0] + previous_event = raw_events["data"][i - 1] generic_event_kwargs = dict( # from DataRecord @@ -321,7 +332,6 @@ def deserialize( iteration_events = [] if event_type in MS_PASS_TYPES: - previous_event = raw_events["data"][i - 1] pass_event_kwargs = _parse_pass( event=raw_event, previous_event=previous_event, @@ -329,106 +339,84 @@ def deserialize( team=team, ) - iteration_events.append( - PassEvent.create( - **pass_event_kwargs, - **generic_event_kwargs, - ) + event = PassEvent.create( + **pass_event_kwargs, + **generic_event_kwargs, ) elif event_type == MS_EVENT_TYPE_SHOT: shot_event_kwargs = _parse_shot( - event=raw_event, subtypes=subtypes + event=raw_event, + previous_event=previous_event, + subtypes=subtypes, ) - iteration_events.append( - ShotEvent.create( - qualifiers=None, - **shot_event_kwargs, - **generic_event_kwargs, - ) + event = ShotEvent.create( + **shot_event_kwargs, + **generic_event_kwargs, ) elif subtypes and MS_EVENT_TYPE_DRIBBLE in subtypes: take_on_event_kwargs = _parse_take_on(subtypes=subtypes) - iteration_events.append( - TakeOnEvent.create( - qualifiers=None, - **take_on_event_kwargs, - **generic_event_kwargs, - ) + event = TakeOnEvent.create( + qualifiers=None, + **take_on_event_kwargs, + **generic_event_kwargs, ) + elif event_type == MS_EVENT_TYPE_CARRY: carry_event_kwargs = _parse_carry( event=raw_event, ) - iteration_events.append( - CarryEvent.create( - qualifiers=None, - **carry_event_kwargs, - **generic_event_kwargs, - ) + event = CarryEvent.create( + qualifiers=None, + **carry_event_kwargs, + **generic_event_kwargs, ) + elif event_type == MS_EVENT_TYPE_RECOVERY: - iteration_events.append( - RecoveryEvent.create( - result=None, - qualifiers=None, - **generic_event_kwargs, - ) + event = RecoveryEvent.create( + result=None, + qualifiers=None, + **generic_event_kwargs, ) + elif event_type == MS_EVENT_TYPE_FOUL_COMMITTED: - iteration_events.append( - FoulCommittedEvent.create( - result=None, - qualifiers=None, - **generic_event_kwargs, - ) + event = FoulCommittedEvent.create( + result=None, + qualifiers=None, + **generic_event_kwargs, ) - elif event_type != MS_SET_PIECE: - iteration_events.append( - GenericEvent.create( - result=None, - qualifiers=None, - event_name=raw_event["type"]["name"], - **generic_event_kwargs, - ) + + else: + event = GenericEvent.create( + result=None, + qualifiers=None, + event_name=raw_event["type"]["name"], + **generic_event_kwargs, ) - # Checks if the event was a pass that ended out of the field to add a - # synthetic ball_out event. - if event_type == MS_PASS_OUTCOME_OUT: + if _include_event(event, wanted_event_types): + events.append(event) + # Checks if the event ended out of the field and adds a synthetic out event + if event.result in OUT_EVENT_RESULTS: + generic_event_kwargs["ball_state"] = BallState.DEAD if raw_event["end"]["x"]: - coordinates = _parse_coordinates(raw_event["end"]) - else: - coordinates = _parse_coordinates(raw_event["start"]) - - iteration_events.append( - BallOutEvent.create( + generic_event_kwargs[ + "coordinates" + ] = _parse_coordinates(raw_event["end"]) + generic_event_kwargs["timestamp"] = raw_event["end"][ + "time" + ] + + event = BallOutEvent.create( result=None, qualifiers=None, - # from DataRecord - period=period, - timestamp=raw_event["end"]["time"], - ball_owning_team=_parse_ball_owning_team( - event_type, team - ), - ball_state=BallState.DEAD, - # from Event - event_id=None, - team=team, - player=player, - coordinates=coordinates, - raw_event=raw_event, + **generic_event_kwargs, ) - ) - for event in iteration_events: - if ( - not wanted_event_types - or event.event_type in wanted_event_types - ): - events.append(event) + if _include_event(event, wanted_event_types): + events.append(event) return EventDataset( metadata=metadata, diff --git a/kloppy/infra/serializers/event/statsbomb/serializer.py b/kloppy/infra/serializers/event/statsbomb/serializer.py index eda4ece0..23503d1e 100644 --- a/kloppy/infra/serializers/event/statsbomb/serializer.py +++ b/kloppy/infra/serializers/event/statsbomb/serializer.py @@ -31,13 +31,20 @@ PlayerOnEvent, PlayerOffEvent, CardType, + Qualifier, + SetPieceQualifier, + SetPieceType, + RecoveryEvent, + FoulCommittedEvent, + BallOutEvent, + Event, ) from kloppy.infra.serializers.event import EventDataSerializer from kloppy.utils import Readable, performance_logging logger = logging.getLogger(__name__) - +SB_EVENT_TYPE_RECOVERY = 2 SB_EVENT_TYPE_DRIBBLE = 14 SB_EVENT_TYPE_SHOT = 16 SB_EVENT_TYPE_PASS = 30 @@ -67,6 +74,15 @@ SB_SHOT_OUTCOME_SAVED = 100 SB_SHOT_OUTCOME_OFF_WAYWARD = 101 +SB_EVENT_TYPE_FREE_KICK = 62 +SB_EVENT_TYPE_THROW_IN = 67 +SB_EVENT_TYPE_KICK_OFF = 65 +SB_EVENT_TYPE_CORNER_KICK = 61 +SB_EVENT_TYPE_PENALTY = 88 +SB_EVENT_TYPE_GOAL_KICK = 63 + +OUT_EVENT_RESULTS = [PassResult.OUT, TakeOnResult.OUT] + def parse_str_ts(timestamp: str) -> float: h, m, s = timestamp.split(":") @@ -108,21 +124,45 @@ def _parse_pass(pass_dict: Dict, team: Team, fidelity_version: int) -> Dict: raise Exception(f"Unknown pass outcome: {outcome_id}") receiver_player = None - receiver_coordinates = None else: result = PassResult.COMPLETE receiver_player = team.get_player_by_id(pass_dict["recipient"]["id"]) - receiver_coordinates = _parse_coordinates( - pass_dict["end_location"], fidelity_version - ) + + receiver_coordinates = _parse_coordinates( + pass_dict["end_location"], fidelity_version + ) + + qualifiers = _get_event_qualifiers(pass_dict) return dict( result=result, receiver_coordinates=receiver_coordinates, receiver_player=receiver_player, + qualifiers=qualifiers, ) +def _get_event_qualifiers(qualifiers_dict: Dict) -> List[Qualifier]: + qualifiers = [] + if "type" in qualifiers_dict: + if qualifiers_dict["type"]["id"] == SB_EVENT_TYPE_CORNER_KICK: + qualifiers.append( + SetPieceQualifier(value=SetPieceType.CORNER_KICK) + ) + elif qualifiers_dict["type"]["id"] == SB_EVENT_TYPE_CORNER_KICK: + qualifiers.append(SetPieceQualifier(value=SetPieceType.FREE_KICK)) + elif qualifiers_dict["type"]["id"] == SB_EVENT_TYPE_PENALTY: + qualifiers.append(SetPieceQualifier(value=SetPieceType.PENALTY)) + elif qualifiers_dict["type"]["id"] == SB_EVENT_TYPE_THROW_IN: + qualifiers.append(SetPieceQualifier(value=SetPieceType.THROW_IN)) + elif qualifiers_dict["type"]["id"] == SB_EVENT_TYPE_KICK_OFF: + qualifiers.append(SetPieceQualifier(value=SetPieceType.KICK_OFF)) + elif qualifiers_dict["type"]["id"] == SB_EVENT_TYPE_GOAL_KICK: + qualifiers.append(SetPieceQualifier(value=SetPieceType.GOAL_KICK)) + + return qualifiers + + def _parse_shot(shot_dict: Dict) -> Dict: outcome_id = shot_dict["outcome"]["id"] if outcome_id == SB_SHOT_OUTCOME_OFF_TARGET: @@ -140,7 +180,9 @@ def _parse_shot(shot_dict: Dict) -> Dict: else: raise Exception(f"Unknown shot outcome: {outcome_id}") - return dict(result=result) + qualifiers = _get_event_qualifiers(shot_dict) + + return dict(result=result, qualifiers=qualifiers) def _parse_carry(carry_dict: Dict, fidelity_version: int) -> Dict: @@ -225,6 +267,10 @@ def _determine_xy_fidelity_versions(events: List[Dict]) -> Tuple[int, int]: return shot_fidelity_version, xy_fidelity_version +def _include_event(event: Event, wanted_event_types: List) -> bool: + return not wanted_event_types or event.event_type in wanted_event_types + + class StatsBombSerializer(EventDataSerializer): @staticmethod def __validate_inputs(inputs: Dict[str, Readable]): @@ -450,7 +496,9 @@ def deserialize( take_on_dict=raw_event["dribble"] ) event = TakeOnEvent.create( - **take_on_event_kwargs, **generic_event_kwargs + qualifiers=None, + **take_on_event_kwargs, + **generic_event_kwargs, ) elif event_type == SB_EVENT_TYPE_CARRY: carry_event_kwargs = _parse_carry( @@ -458,6 +506,7 @@ def deserialize( fidelity_version=fidelity_version, ) event = CarryEvent.create( + qualifiers=None, # TODO: Consider moving this to _parse_carry end_timestamp=timestamp + raw_event["duration"], **carry_event_kwargs, @@ -471,6 +520,7 @@ def deserialize( ) event = SubstitutionEvent.create( result=None, + qualifiers=None, **substitution_event_kwargs, **generic_event_kwargs, ) @@ -481,6 +531,7 @@ def deserialize( if card_kwargs["card_type"]: event = CardEvent.create( result=None, + qualifiers=None, card_type=card_kwargs["card_type"], **generic_event_kwargs, ) @@ -493,32 +544,62 @@ def deserialize( if card_kwargs["card_type"]: event = CardEvent.create( result=None, + qualifiers=None, card_type=card_kwargs["card_type"], **generic_event_kwargs, ) elif event_type == SB_EVENT_TYPE_PLAYER_ON: event = PlayerOnEvent.create( - result=None, **generic_event_kwargs + result=None, qualifiers=None, **generic_event_kwargs ) elif event_type == SB_EVENT_TYPE_PLAYER_OFF: event = PlayerOffEvent.create( - result=None, **generic_event_kwargs + result=None, qualifiers=None, **generic_event_kwargs + ) + + elif event_type == SB_EVENT_TYPE_RECOVERY: + event = RecoveryEvent.create( + result=None, + qualifiers=None, + **generic_event_kwargs, + ) + + elif event_type == SB_EVENT_TYPE_FOUL_COMMITTED: + event = FoulCommittedEvent.create( + result=None, + qualifiers=None, + **generic_event_kwargs, ) # rest: generic else: event = GenericEvent.create( result=None, + qualifiers=None, event_name=raw_event["type"]["name"], **generic_event_kwargs, ) - if ( - not wanted_event_types - or event.event_type in wanted_event_types - ): + if _include_event(event, wanted_event_types): events.append(event) + # Checks if the event ended out of the field and adds a synthetic out event + if event.result in OUT_EVENT_RESULTS: + generic_event_kwargs["ball_state"] = BallState.DEAD + if event.receiver_coordinates: + generic_event_kwargs[ + "coordinates" + ] = event.receiver_coordinates + + event = BallOutEvent.create( + result=None, + qualifiers=None, + **generic_event_kwargs, + ) + + if _include_event(event, wanted_event_types): + events.append(event) + metadata = Metadata( teams=teams, periods=periods, diff --git a/kloppy/tests/test_metrica.py b/kloppy/tests/test_metrica.py index 11078a09..72b5bb98 100644 --- a/kloppy/tests/test_metrica.py +++ b/kloppy/tests/test_metrica.py @@ -93,7 +93,7 @@ def test_correct_deserialization(self): assert dataset.metadata.provider == Provider.METRICA assert dataset.dataset_type == DatasetType.EVENT - assert len(dataset.events) == 3594 + assert len(dataset.events) == 3684 assert len(dataset.metadata.periods) == 2 assert dataset.metadata.orientation is None assert dataset.metadata.teams[0].name == "Team A" @@ -119,4 +119,4 @@ def test_correct_deserialization(self): ) # Make sure we are using the improved event types. - dataset.records[0].qualifiers[0].value == SetPieceType.KICK_OFF + dataset.records[1].qualifiers[0].value == SetPieceType.KICK_OFF diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index 2068403d..5c5bda31 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -36,7 +36,7 @@ def test_correct_deserialization(self): assert dataset.metadata.provider == Provider.STATSBOMB assert dataset.dataset_type == DatasetType.EVENT - assert len(dataset.events) == 4002 + assert len(dataset.events) == 4022 assert len(dataset.metadata.periods) == 2 assert ( dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM From 4419cc3e52c771dd5471f65735f3a27f12d86faa Mon Sep 17 00:00:00 2001 From: Bruno Date: Thu, 5 Nov 2020 00:15:27 -0300 Subject: [PATCH 6/8] Added new event types to Opta serializer --- .../serializers/event/opta/serializer.py | 281 +++++++++++------- .../serializers/event/statsbomb/serializer.py | 2 +- kloppy/tests/test_helpers.py | 2 +- kloppy/tests/test_state_builder.py | 6 +- 4 files changed, 185 insertions(+), 106 deletions(-) diff --git a/kloppy/infra/serializers/event/opta/serializer.py b/kloppy/infra/serializers/event/opta/serializer.py index 7493a334..27972ffa 100644 --- a/kloppy/infra/serializers/event/opta/serializer.py +++ b/kloppy/infra/serializers/event/opta/serializer.py @@ -30,12 +30,133 @@ Metadata, Player, Position, + RecoveryEvent, + BallOutEvent, + FoulCommittedEvent, + Qualifier, + SetPieceQualifier, + SetPieceType, ) from kloppy.infra.serializers.event import EventDataSerializer from kloppy.utils import Readable, performance_logging logger = logging.getLogger(__name__) +EVENT_TYPE_START_PERIOD = 32 +EVENT_TYPE_END_PERIOD = 30 + +EVENT_TYPE_PASS = 1 +EVENT_TYPE_OFFSIDE_PASS = 2 +EVENT_TYPE_TAKE_ON = 3 +EVENT_TYPE_SHOT_MISS = 13 +EVENT_TYPE_SHOT_POST = 14 +EVENT_TYPE_SHOT_SAVED = 15 +EVENT_TYPE_SHOT_GOAL = 16 +EVENT_TYPE_BALL_OUT = 5 +EVENT_TYPE_CORNER_AWARDED = 6 +EVENT_TYPE_FOUL_COMMITTED = 4 +EVENT_TYPE_RECOVERY = 49 + +BALL_OUT_EVENTS = [EVENT_TYPE_BALL_OUT, EVENT_TYPE_CORNER_AWARDED] + +BALL_OWNING_EVENTS = ( + EVENT_TYPE_PASS, + EVENT_TYPE_OFFSIDE_PASS, + EVENT_TYPE_TAKE_ON, + EVENT_TYPE_SHOT_MISS, + EVENT_TYPE_SHOT_POST, + EVENT_TYPE_SHOT_SAVED, + EVENT_TYPE_SHOT_GOAL, + EVENT_TYPE_RECOVERY, +) + +EVENT_QUALIFIER_GOAL_KICK = 124 +EVENT_QUALIFIER_FREE_KICK = 5 +EVENT_QUALIFIER_THROW_IN = 107 +EVENT_QUALIFIER_CORNER_KICK = 6 +EVENT_QUALIFIER_PENALTY = 9 +EVENT_QUALIFIER_KICK_OFF = 279 + +event_type_names = { + 1: "pass", + 2: "offside pass", + 3: "take on", + 4: "foul", + 5: "out", + 6: "corner awarded", + 7: "tackle", + 8: "interception", + 9: "turnover", + 10: "save", + 11: "claim", + 12: "clearance", + 13: "miss", + 14: "post", + 15: "attempt saved", + 16: "goal", + 17: "card", + 18: "player off", + 19: "player on", + 20: "player retired", + 21: "player returns", + 22: "player becomes goalkeeper", + 23: "goalkeeper becomes player", + 24: "condition change", + 25: "official change", + 26: "unknown26", + 27: "start delay", + 28: "end delay", + 29: "unknown29", + 30: "end", + 31: "unknown31", + 32: "start", + 33: "unknown33", + 34: "team set up", + 35: "player changed position", + 36: "player changed jersey number", + 37: "collection end", + 38: "temp_goal", + 39: "temp_attempt", + 40: "formation change", + 41: "punch", + 42: "good skill", + 43: "deleted event", + 44: "aerial", + 45: "challenge", + 46: "unknown46", + 47: "rescinded card", + 48: "unknown46", + 49: "ball recovery", + 50: "dispossessed", + 51: "error", + 52: "keeper pick-up", + 53: "cross not claimed", + 54: "smother", + 55: "offside provoked", + 56: "shield ball opp", + 57: "foul throw in", + 58: "penalty faced", + 59: "keeper sweeper", + 60: "chance missed", + 61: "ball touch", + 62: "unknown62", + 63: "temp_save", + 64: "resume", + 65: "contentious referee decision", + 66: "possession data", + 67: "50/50", + 68: "referee drop ball", + 69: "failed to block", + 70: "injury time announcement", + 71: "coach setup", + 72: "caught offside", + 73: "other ball contact", + 74: "blocked pass", + 75: "delayed start", + 76: "early end", + 77: "player off pitch", +} + def _parse_f24_datetime(dt_str: str) -> float: return ( @@ -45,30 +166,35 @@ def _parse_f24_datetime(dt_str: str) -> float: ) -def _parse_pass(qualifiers: Dict[int, str], outcome: int) -> Dict: +def _parse_pass(raw_qualifiers: Dict[int, str], outcome: int) -> Dict: if outcome: receiver_coordinates = Point( - x=float(qualifiers[140]), y=float(qualifiers[141]) + x=float(raw_qualifiers[140]), y=float(raw_qualifiers[141]) ) result = PassResult.COMPLETE else: result = PassResult.INCOMPLETE receiver_coordinates = None + qualifiers = _get_event_qualifiers(raw_qualifiers) + return dict( result=result, receiver_coordinates=receiver_coordinates, receiver_player=None, receive_timestamp=None, + qualifiers=qualifiers, ) -def _parse_offside_pass() -> Dict: +def _parse_offside_pass(raw_qualifiers: List) -> Dict: + qualifiers = _get_event_qualifiers(raw_qualifiers) return dict( result=PassResult.OFFSIDE, receiver_coordinates=None, receiver_player=None, receive_timestamp=None, + qualifiers=qualifiers, ) @@ -81,16 +207,18 @@ def _parse_take_on(outcome: int) -> Dict: def _parse_shot( - qualifiers: Dict[int, str], type_id: int, coordinates: Point + raw_qualifiers: Dict[int, str], type_id: int, coordinates: Point ) -> Dict: if type_id == EVENT_TYPE_SHOT_GOAL: - if 28 in qualifiers: + if 28 in raw_qualifiers: coordinates = Point(x=100 - coordinates.x, y=100 - coordinates.y) result = ShotResult.GOAL else: result = None - return dict(coordinates=coordinates, result=result) + qualifiers = _get_event_qualifiers(raw_qualifiers) + + return dict(coordinates=coordinates, result=result, qualifiers=qualifiers) def _parse_team_players( @@ -155,98 +283,22 @@ def _team_from_xml_elm(team_elm, f7_root) -> Team: return team -EVENT_TYPE_START_PERIOD = 32 -EVENT_TYPE_END_PERIOD = 30 +def _get_event_qualifiers(raw_qualifiers: List) -> List[Qualifier]: + qualifiers = [] + if EVENT_QUALIFIER_CORNER_KICK in raw_qualifiers: + qualifiers.append(SetPieceQualifier(value=SetPieceType.CORNER_KICK)) + elif EVENT_QUALIFIER_FREE_KICK in raw_qualifiers: + qualifiers.append(SetPieceQualifier(value=SetPieceType.FREE_KICK)) + elif EVENT_QUALIFIER_PENALTY in raw_qualifiers: + qualifiers.append(SetPieceQualifier(value=SetPieceType.PENALTY)) + elif EVENT_QUALIFIER_THROW_IN in raw_qualifiers: + qualifiers.append(SetPieceQualifier(value=SetPieceType.THROW_IN)) + elif EVENT_QUALIFIER_KICK_OFF in raw_qualifiers: + qualifiers.append(SetPieceQualifier(value=SetPieceType.KICK_OFF)) + elif EVENT_QUALIFIER_GOAL_KICK in raw_qualifiers: + qualifiers.append(SetPieceQualifier(value=SetPieceType.GOAL_KICK)) -EVENT_TYPE_PASS = 1 -EVENT_TYPE_OFFSIDE_PASS = 1 -EVENT_TYPE_TAKE_ON = 3 -EVENT_TYPE_SHOT_MISS = 13 -EVENT_TYPE_SHOT_POST = 14 -EVENT_TYPE_SHOT_SAVED = 15 -EVENT_TYPE_SHOT_GOAL = 16 - -event_type_names = { - 1: "pass", - 2: "offside pass", - 3: "take on", - 4: "foul", - 5: "out", - 6: "corner awarded", - 7: "tackle", - 8: "interception", - 9: "turnover", - 10: "save", - 11: "claim", - 12: "clearance", - 13: "miss", - 14: "post", - 15: "attempt saved", - 16: "goal", - 17: "card", - 18: "player off", - 19: "player on", - 20: "player retired", - 21: "player returns", - 22: "player becomes goalkeeper", - 23: "goalkeeper becomes player", - 24: "condition change", - 25: "official change", - 26: "unknown26", - 27: "start delay", - 28: "end delay", - 29: "unknown29", - 30: "end", - 31: "unknown31", - 32: "start", - 33: "unknown33", - 34: "team set up", - 35: "player changed position", - 36: "player changed jersey number", - 37: "collection end", - 38: "temp_goal", - 39: "temp_attempt", - 40: "formation change", - 41: "punch", - 42: "good skill", - 43: "deleted event", - 44: "aerial", - 45: "challenge", - 46: "unknown46", - 47: "rescinded card", - 48: "unknown46", - 49: "ball recovery", - 50: "dispossessed", - 51: "error", - 52: "keeper pick-up", - 53: "cross not claimed", - 54: "smother", - 55: "offside provoked", - 56: "shield ball opp", - 57: "foul throw in", - 58: "penalty faced", - 59: "keeper sweeper", - 60: "chance missed", - 61: "ball touch", - 62: "unknown62", - 63: "temp_save", - 64: "resume", - 65: "contentious referee decision", - 66: "possession data", - 67: "50/50", - 68: "referee drop ball", - 69: "failed to block", - 70: "injury time announcement", - 71: "coach setup", - 72: "caught offside", - 73: "other ball contact", - 74: "blocked pass", - 75: "delayed start", - 76: "early end", - 77: "player off pitch", -} - -BALL_OWNING_EVENTS = (1, 2, 3, 13, 14, 15, 16, 49) + return qualifiers def _get_event_type_name(type_id: int) -> str: @@ -402,7 +454,7 @@ def deserialize( x = float(event_elm.attrib["x"]) y = float(event_elm.attrib["y"]) outcome = int(event_elm.attrib["outcome"]) - qualifiers = { + raw_qualifiers = { int( qualifier_elm.attrib["qualifier_id"] ): qualifier_elm.attrib.get("value") @@ -432,13 +484,15 @@ def deserialize( ) if type_id == EVENT_TYPE_PASS: - pass_event_kwargs = _parse_pass(qualifiers, outcome) + pass_event_kwargs = _parse_pass( + raw_qualifiers, outcome + ) event = PassEvent.create( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_OFFSIDE_PASS: - pass_event_kwargs = _parse_offside_pass() + pass_event_kwargs = _parse_offside_pass(raw_qualifiers) event = PassEvent.create( **pass_event_kwargs, **generic_event_kwargs, @@ -446,6 +500,7 @@ def deserialize( elif type_id == EVENT_TYPE_TAKE_ON: take_on_event_kwargs = _parse_take_on(outcome) event = TakeOnEvent.create( + qualifiers=None, **take_on_event_kwargs, **generic_event_kwargs, ) @@ -456,7 +511,7 @@ def deserialize( EVENT_TYPE_SHOT_GOAL, ): shot_event_kwargs = _parse_shot( - qualifiers, + raw_qualifiers, type_id, coordinates=generic_event_kwargs["coordinates"], ) @@ -464,10 +519,34 @@ def deserialize( kwargs.update(generic_event_kwargs) kwargs.update(shot_event_kwargs) event = ShotEvent.create(**kwargs) + + elif type_id == EVENT_TYPE_RECOVERY: + event = RecoveryEvent.create( + result=None, + qualifiers=None, + **generic_event_kwargs, + ) + + elif type_id == EVENT_TYPE_FOUL_COMMITTED: + event = FoulCommittedEvent.create( + result=None, + qualifiers=None, + **generic_event_kwargs, + ) + + elif type_id in BALL_OUT_EVENTS: + generic_event_kwargs["ball_state"] = BallState.DEAD + event = BallOutEvent.create( + result=None, + qualifiers=None, + **generic_event_kwargs, + ) + else: event = GenericEvent.create( **generic_event_kwargs, result=None, + qualifiers=None, event_name=_get_event_type_name(type_id), ) diff --git a/kloppy/infra/serializers/event/statsbomb/serializer.py b/kloppy/infra/serializers/event/statsbomb/serializer.py index 23503d1e..ae39017e 100644 --- a/kloppy/infra/serializers/event/statsbomb/serializer.py +++ b/kloppy/infra/serializers/event/statsbomb/serializer.py @@ -149,7 +149,7 @@ def _get_event_qualifiers(qualifiers_dict: Dict) -> List[Qualifier]: qualifiers.append( SetPieceQualifier(value=SetPieceType.CORNER_KICK) ) - elif qualifiers_dict["type"]["id"] == SB_EVENT_TYPE_CORNER_KICK: + elif qualifiers_dict["type"]["id"] == SB_EVENT_TYPE_FREE_KICK: qualifiers.append(SetPieceQualifier(value=SetPieceType.FREE_KICK)) elif qualifiers_dict["type"]["id"] == SB_EVENT_TYPE_PENALTY: qualifiers.append(SetPieceQualifier(value=SetPieceType.PENALTY)) diff --git a/kloppy/tests/test_helpers.py b/kloppy/tests/test_helpers.py index 9df0c683..62b98a10 100644 --- a/kloppy/tests/test_helpers.py +++ b/kloppy/tests/test_helpers.py @@ -164,7 +164,7 @@ def test_to_pandas_generic_events(self): ) dataframe = to_pandas(dataset) - dataframe = dataframe[dataframe.event_type == "GENERIC:out"] + dataframe = dataframe[dataframe.event_type == "BALL_OUT"] assert dataframe.shape[0] == 2 def test_to_pandas_additional_columns(self): diff --git a/kloppy/tests/test_state_builder.py b/kloppy/tests/test_state_builder.py index 8d25445c..394c9d1f 100644 --- a/kloppy/tests/test_state_builder.py +++ b/kloppy/tests/test_state_builder.py @@ -38,9 +38,9 @@ def test_score_state_builder(self): events_per_score[str(score)] = len(events) assert events_per_score == { - "0-0": 2884, - "1-0": 711, - "2-0": 404, + "0-0": 2897, + "1-0": 717, + "2-0": 405, "3-0": 3, } From 6c0d99db73c6202846dc3823713e1ce63191e84e Mon Sep 17 00:00:00 2001 From: koenvo Date: Fri, 6 Nov 2020 11:22:36 +0100 Subject: [PATCH 7/8] Update event.py Minor fix --- kloppy/domain/models/event.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index b8ffd797..8f35391a 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -136,7 +136,7 @@ class Event(DataRecord, ABC): raw_event: Dict state: Dict[str, any] - qualifiers: List[SetPieceType] + qualifiers: List[Qualifier] @property @abstractmethod From 454f031c24d6121123d6607c25fa44f6c3442ff0 Mon Sep 17 00:00:00 2001 From: koenvo Date: Fri, 6 Nov 2020 11:23:53 +0100 Subject: [PATCH 8/8] Update utils.py --- kloppy/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kloppy/utils.py b/kloppy/utils.py index 01a69f29..81fd179e 100644 --- a/kloppy/utils.py +++ b/kloppy/utils.py @@ -47,4 +47,7 @@ def camelcase_to_snakecase(name): def removes_suffix(string, suffix): - return string[: -len(suffix)] + if string[-len(suffix):] == suffix: + return string[: -len(suffix)] + else: + return string