Skip to content

Commit

Permalink
Add current formation and opponent formation info to event, start wit…
Browse files Browse the repository at this point in the history
…h parser for Opta
  • Loading branch information
DriesDeprest committed Dec 1, 2023
1 parent 096f959 commit 581afcc
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 9 deletions.
2 changes: 2 additions & 0 deletions kloppy/domain/models/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,8 @@ class Event(DataRecord, ABC):
raw_event: Dict
state: Dict[str, Any]
related_event_ids: List[str]
formation: Optional[FormationType]
opponent_formation: Optional[FormationType]

qualifiers: List[Qualifier]

Expand Down
67 changes: 59 additions & 8 deletions kloppy/infra/serializers/event/opta/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
EVENT_TYPE_CORNER_AWARDED = 6
EVENT_TYPE_FOUL_COMMITTED = 4
EVENT_TYPE_CARD = 17
EVENT_TYPE_TEAM_SET_UP = 34
EVENT_TYPE_RECOVERY = 49
EVENT_TYPE_FORMATION_CHANGE = 40
EVENT_TYPE_BALL_TOUCH = 61
Expand Down Expand Up @@ -698,6 +699,8 @@ def deserialize(self, inputs: OptaInputs) -> EventDataset:
for event in list(game_elm.iterchildren("Event"))
if int(event.attrib["type_id"]) != EVENT_TYPE_DELETED_EVENT
]
current_home_team_formation = None
current_away_team_formation = None
for idx, event_elm in enumerate(events_list):
next_event_elm = (
events_list[idx + 1]
Expand All @@ -708,16 +711,39 @@ def deserialize(self, inputs: OptaInputs) -> EventDataset:
type_id = int(event_elm.attrib["type_id"])
timestamp = _parse_f24_datetime(event_elm.attrib["timestamp"])
period_id = int(event_elm.attrib["period_id"])
raw_qualifiers = {
int(
qualifier_elm.attrib["qualifier_id"]
): qualifier_elm.attrib.get("value")
for qualifier_elm in event_elm.iterchildren("Q")
}
for period in periods:
if period.id == period_id:
if (
period.id == period_id
or type_id == EVENT_TYPE_TEAM_SET_UP
):
break
else:
logger.debug(
f"Skipping event {event_id} because period doesn't match {period_id}"
)
continue

if type_id == EVENT_TYPE_START_PERIOD:
if type_id == EVENT_TYPE_TEAM_SET_UP:
if event_elm.attrib["team_id"] == home_team.team_id:
current_home_team_formation = formations[
int(raw_qualifiers[EVENT_QUALIFIER_TEAM_FORMATION])
]
elif event_elm.attrib["team_id"] == away_team.team_id:
current_away_team_formation = formations[
int(raw_qualifiers[EVENT_QUALIFIER_TEAM_FORMATION])
]
else:
raise DeserializationError(
f"Unknown team_id {event_elm.attrib['team_id']}"
)
continue
elif type_id == EVENT_TYPE_START_PERIOD:
logger.debug(
f"Set start of period {period.id} to {timestamp}"
)
Expand All @@ -734,8 +760,16 @@ def deserialize(self, inputs: OptaInputs) -> EventDataset:

if event_elm.attrib["team_id"] == home_team.team_id:
team = teams[0]
current_formation = current_home_team_formation
current_opponent_formation = (
current_away_team_formation
)
elif event_elm.attrib["team_id"] == away_team.team_id:
team = teams[1]
current_formation = current_away_team_formation
current_opponent_formation = (
current_home_team_formation
)
else:
raise DeserializationError(
f"Unknown team_id {event_elm.attrib['team_id']}"
Expand All @@ -744,12 +778,6 @@ def deserialize(self, inputs: OptaInputs) -> EventDataset:
x = float(event_elm.attrib["x"])
y = float(event_elm.attrib["y"])
outcome = int(event_elm.attrib["outcome"])
raw_qualifiers = {
int(
qualifier_elm.attrib["qualifier_id"]
): qualifier_elm.attrib.get("value")
for qualifier_elm in event_elm.iterchildren("Q")
}
player = None
if "player_id" in event_elm.attrib:
player = team.get_player_by_id(
Expand All @@ -771,6 +799,8 @@ def deserialize(self, inputs: OptaInputs) -> EventDataset:
player=player,
coordinates=Point(x=x, y=y),
raw_event=event_elm,
formation=current_formation,
opponent_formation=current_opponent_formation,
)

if type_id == EVENT_TYPE_PASS:
Expand Down Expand Up @@ -891,6 +921,27 @@ def deserialize(self, inputs: OptaInputs) -> EventDataset:
**formation_change_event_kwargs,
**generic_event_kwargs,
)
if event_elm.attrib["team_id"] == home_team.team_id:
current_home_team_formation = formations[
int(
raw_qualifiers[
EVENT_QUALIFIER_TEAM_FORMATION
]
)
]
elif event_elm.attrib["team_id"] == away_team.team_id:
current_away_team_formation = formations[
int(
raw_qualifiers[
EVENT_QUALIFIER_TEAM_FORMATION
]
)
]
else:
raise DeserializationError(
f"Unknown team_id {event_elm.attrib['team_id']}"
)

elif type_id == EVENT_TYPE_CARD:
generic_event_kwargs["ball_state"] = BallState.DEAD
card_event_kwargs = _parse_card(raw_qualifiers)
Expand Down
4 changes: 4 additions & 0 deletions kloppy/infra/serializers/event/wyscout/deserializer_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,10 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
+ (raw_event["minute"] * 60)
- (60 * 45)
),
"formation": raw_event["team"]["formation"],
"opponent_formation": raw_event["OpponentTeam"][
"formation"
],
}

primary_event_type = raw_event["type"]["primary"]
Expand Down
14 changes: 13 additions & 1 deletion kloppy/tests/test_opta.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
Point3D,
)

from kloppy.domain.models.event import EventType
from kloppy.domain.models.event import EventType, FormationType

from kloppy import opta

Expand Down Expand Up @@ -163,6 +163,18 @@ def test_correct_deserialization(self, f7_data: str, f24_data: str):
== DuelType.GROUND
)

# Check event formations
assert (
dataset.events[5].formation == FormationType.FOUR_FOUR_TWO
and dataset.events[5].opponent_formation
== FormationType.FOUR_THREE_THREE
)
assert (
dataset.events[6].formation == FormationType.FOUR_THREE_THREE
and dataset.events[6].opponent_formation
== FormationType.FOUR_FOUR_TWO
)

def test_shot(self, f7_data: str, f24_data: str):
dataset = opta.load(
f24_data=f24_data,
Expand Down

0 comments on commit 581afcc

Please sign in to comment.