From c7c8b6a0a053ebe8e2ac9797690f0d0d2cf85f29 Mon Sep 17 00:00:00 2001 From: Joris Bekkers Date: Thu, 11 Jan 2024 09:15:35 +0100 Subject: [PATCH 1/9] redo of PR215, includes updated jsonl loader without with open(), Dries' minor change to IDs and updated event_factory --- kloppy/domain/services/event_factory.py | 4 ++ .../serializers/event/sportec/deserializer.py | 5 ++- .../event/statsbomb/deserializer.py | 2 +- .../infra/serializers/tracking/skillcorner.py | 38 ++++++++++++++----- kloppy/tests/test_skillcorner.py | 3 ++ 5 files changed, 40 insertions(+), 12 deletions(-) diff --git a/kloppy/domain/services/event_factory.py b/kloppy/domain/services/event_factory.py index 33fe4f61..fbbfaa69 100644 --- a/kloppy/domain/services/event_factory.py +++ b/kloppy/domain/services/event_factory.py @@ -41,6 +41,10 @@ def create_event(event_cls: Type[T], **kwargs) -> T: but we filter those arguments out. """ extra_kwargs = {"state": {}} + + if "qualifiers" not in kwargs: + kwargs["qualifiers"] = None + if "related_event_ids" not in kwargs: extra_kwargs["related_event_ids"] = [] diff --git a/kloppy/infra/serializers/event/sportec/deserializer.py b/kloppy/infra/serializers/event/sportec/deserializer.py index fa29f753..be0b8104 100644 --- a/kloppy/infra/serializers/event/sportec/deserializer.py +++ b/kloppy/infra/serializers/event/sportec/deserializer.py @@ -112,7 +112,10 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata: if not away_team: raise DeserializationError("Away team is missing from metadata") - (home_score, away_score,) = match_root.MatchInformation.General.attrib[ + ( + home_score, + away_score, + ) = match_root.MatchInformation.General.attrib[ "Result" ].split(":") score = Score(home=int(home_score), away=int(away_score)) diff --git a/kloppy/infra/serializers/event/statsbomb/deserializer.py b/kloppy/infra/serializers/event/statsbomb/deserializer.py index 419e96ac..920774a0 100644 --- a/kloppy/infra/serializers/event/statsbomb/deserializer.py +++ b/kloppy/infra/serializers/event/statsbomb/deserializer.py @@ -228,7 +228,7 @@ def create_periods(self, raw_events): ::2 ] # recorded for each team, take every other periods = [] - for (start_event, end_event) in zip_longest( + for start_event, end_event in zip_longest( half_start_and_end_events[::2], half_start_and_end_events[1::2] ): if ( diff --git a/kloppy/infra/serializers/tracking/skillcorner.py b/kloppy/infra/serializers/tracking/skillcorner.py index ebdbe1e8..a255e501 100644 --- a/kloppy/infra/serializers/tracking/skillcorner.py +++ b/kloppy/infra/serializers/tracking/skillcorner.py @@ -4,6 +4,7 @@ from collections import Counter import numpy as np import json +from pathlib import Path from kloppy.domain import ( attacking_direction_from_frame, @@ -136,7 +137,7 @@ def _get_frame_data( return Frame( frame_id=frame_id, - timestamp=frame_time, + timestamp=frame_time - periods[frame_period].start_timestamp, ball_coordinates=ball_coordinates, players_data=players_data, period=periods[frame_period], @@ -147,8 +148,16 @@ def _get_frame_data( @classmethod def _timestamp_from_timestring(cls, timestring): - m, s = timestring.split(":") - return 60 * float(m) + float(s) + parts = timestring.split(":") + + if len(parts) == 2: + m, s = parts + return 60 * float(m) + float(s) + elif len(parts) == 3: + h, m, s = parts + return 3600 * float(h) + 60 * float(m) + float(s) + else: + raise ValueError("Invalid timestring format") @classmethod def _set_skillcorner_attacking_directions(cls, frames, periods): @@ -182,7 +191,17 @@ def _set_skillcorner_attacking_directions(cls, frames, periods): ].attacking_direction = AttackingDirection.NOT_SET def __load_json(self, file): - return json.load(file) + if Path(file.name).suffix == ".jsonl": + data = [] + for line in file: + obj = json.loads(line) + # for each line rename timestamp to time to make it compatible with existing loader + if "timestamp" in obj: + obj["time"] = obj.pop("timestamp") + data.append(obj) + return data + else: + return json.load(file) @classmethod def __get_periods(cls, tracking): @@ -261,7 +280,7 @@ def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset: metadata["away_team"].get("id"): "away_team", } - player_id_to_team_dict = { + player_to_team_dict = { player["trackable_object"]: player["team_id"] for player in metadata["players"] } @@ -302,8 +321,7 @@ def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset: ) teams = [home_team, away_team] - for player_id in player_dict.keys(): - player = player_dict.get(player_id) + for player_track_obj_id, player in player_dict.items(): team_id = player["team_id"] if team_id == home_team_id: @@ -313,8 +331,8 @@ def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset: team_string = "AWAY" team = away_team - players[team_string][player_id] = Player( - player_id=f"{team.ground}_{player['number']}", + players[team_string][player_track_obj_id] = Player( + player_id=f"{player['id']}", team=team, jersey_no=player["number"], name=f"{player['first_name']} {player['last_name']}", @@ -359,7 +377,7 @@ def _iter(): teams, teamdict, players, - player_id_to_team_dict, + player_to_team_dict, periods, player_dict, anon_players, diff --git a/kloppy/tests/test_skillcorner.py b/kloppy/tests/test_skillcorner.py index 67741ad4..0688f033 100644 --- a/kloppy/tests/test_skillcorner.py +++ b/kloppy/tests/test_skillcorner.py @@ -50,6 +50,9 @@ def test_correct_deserialization(self, raw_data: Path, meta_data: Path): # are frames with wrong camera views and pregame skipped? assert dataset.records[0].timestamp == 11.2 + # make sure skillcorner ID is used as player ID + assert dataset.metadata.teams[0].players[0].player_id == "10247" + # make sure data is loaded correctly home_player = dataset.metadata.teams[0].players[2] assert dataset.records[0].players_data[ From 8f2b20ca9341f7979b4c51d429602462d2960f1e Mon Sep 17 00:00:00 2001 From: driesdeprest Date: Fri, 12 Jan 2024 09:08:00 +0100 Subject: [PATCH 2/9] Fix Wyscout orientation --- kloppy/infra/serializers/event/wyscout/deserializer_v2.py | 2 +- kloppy/infra/serializers/event/wyscout/deserializer_v3.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kloppy/infra/serializers/event/wyscout/deserializer_v2.py b/kloppy/infra/serializers/event/wyscout/deserializer_v2.py index 26cfb1fe..2ef37b64 100644 --- a/kloppy/infra/serializers/event/wyscout/deserializer_v2.py +++ b/kloppy/infra/serializers/event/wyscout/deserializer_v2.py @@ -712,7 +712,7 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset: pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, score=None, frame_rate=None, - orientation=Orientation.BALL_OWNING_TEAM, + orientation=Orientation.ACTION_EXECUTING_TEAM, flags=None, provider=Provider.WYSCOUT, coordinate_system=transformer.get_to_coordinate_system(), diff --git a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py index 54bfb1fb..a19ce11a 100644 --- a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py +++ b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py @@ -684,7 +684,7 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset: pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, score=None, frame_rate=None, - orientation=Orientation.BALL_OWNING_TEAM, + orientation=Orientation.ACTION_EXECUTING_TEAM, flags=None, provider=Provider.WYSCOUT, coordinate_system=transformer.get_to_coordinate_system(), From 93d508a85dfd2d3509b900ad5ac312392d91b3a2 Mon Sep 17 00:00:00 2001 From: driesdeprest Date: Fri, 12 Jan 2024 09:20:41 +0100 Subject: [PATCH 3/9] Adding test which checks orientation of Wyscout generated EventDataset --- kloppy/tests/test_wyscout.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kloppy/tests/test_wyscout.py b/kloppy/tests/test_wyscout.py index c1d0f42b..3cd6b516 100644 --- a/kloppy/tests/test_wyscout.py +++ b/kloppy/tests/test_wyscout.py @@ -16,6 +16,7 @@ GoalkeeperActionType, CardQualifier, CardType, + Orientation, ) from kloppy import wyscout @@ -52,6 +53,9 @@ def dataset(self, event_v2_data) -> EventDataset: data_version="V2", ) assert dataset.dataset_type == DatasetType.EVENT + assert ( + dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM + ) return dataset def test_shot_event(self, dataset: EventDataset): @@ -130,6 +134,9 @@ def dataset(self, event_v3_data: Path) -> EventDataset: data_version="V3", ) assert dataset.dataset_type == DatasetType.EVENT + assert ( + dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM + ) return dataset def test_coordinates(self, dataset: EventDataset): From 8787f9c296057d14c9537a9150dc1ce4abfaf2ec Mon Sep 17 00:00:00 2001 From: JanVanHaaren Date: Sun, 14 Jan 2024 14:50:43 +0100 Subject: [PATCH 4/9] Undo unrelated changes --- kloppy/domain/services/event_factory.py | 4 ---- kloppy/infra/serializers/event/sportec/deserializer.py | 5 +---- kloppy/infra/serializers/event/statsbomb/deserializer.py | 2 +- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/kloppy/domain/services/event_factory.py b/kloppy/domain/services/event_factory.py index fbbfaa69..33fe4f61 100644 --- a/kloppy/domain/services/event_factory.py +++ b/kloppy/domain/services/event_factory.py @@ -41,10 +41,6 @@ def create_event(event_cls: Type[T], **kwargs) -> T: but we filter those arguments out. """ extra_kwargs = {"state": {}} - - if "qualifiers" not in kwargs: - kwargs["qualifiers"] = None - if "related_event_ids" not in kwargs: extra_kwargs["related_event_ids"] = [] diff --git a/kloppy/infra/serializers/event/sportec/deserializer.py b/kloppy/infra/serializers/event/sportec/deserializer.py index be0b8104..fa29f753 100644 --- a/kloppy/infra/serializers/event/sportec/deserializer.py +++ b/kloppy/infra/serializers/event/sportec/deserializer.py @@ -112,10 +112,7 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata: if not away_team: raise DeserializationError("Away team is missing from metadata") - ( - home_score, - away_score, - ) = match_root.MatchInformation.General.attrib[ + (home_score, away_score,) = match_root.MatchInformation.General.attrib[ "Result" ].split(":") score = Score(home=int(home_score), away=int(away_score)) diff --git a/kloppy/infra/serializers/event/statsbomb/deserializer.py b/kloppy/infra/serializers/event/statsbomb/deserializer.py index 920774a0..419e96ac 100644 --- a/kloppy/infra/serializers/event/statsbomb/deserializer.py +++ b/kloppy/infra/serializers/event/statsbomb/deserializer.py @@ -228,7 +228,7 @@ def create_periods(self, raw_events): ::2 ] # recorded for each team, take every other periods = [] - for start_event, end_event in zip_longest( + for (start_event, end_event) in zip_longest( half_start_and_end_events[::2], half_start_and_end_events[1::2] ): if ( From 25045aa512c2d38dfca1217025f6cd5b9fcfd5ee Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Sat, 20 Jan 2024 12:28:08 +0100 Subject: [PATCH 5/9] refactor(tests): Refactor Opta tests --- kloppy/tests/test_opta.py | 467 ++++++++++++++++++++++++++------------ 1 file changed, 320 insertions(+), 147 deletions(-) diff --git a/kloppy/tests/test_opta.py b/kloppy/tests/test_opta.py index 59261a13..684fa2e9 100644 --- a/kloppy/tests/test_opta.py +++ b/kloppy/tests/test_opta.py @@ -1,197 +1,302 @@ import math +from datetime import datetime, timezone import pytest from kloppy.domain import ( AttackingDirection, - Period, - Orientation, - Provider, - Ground, - Point, + BallState, BodyPart, - SetPieceType, - PassType, - DatasetType, + BodyPartQualifier, + BodyPartQualifier, + CardQualifier, CardType, - FormationType, - GoalkeeperQualifier, - GoalkeeperActionType, + CounterAttackQualifier, + DatasetFlag, + DatasetType, + Dimension, DuelQualifier, DuelType, - ShotResult, - SetPieceQualifier, - CounterAttackQualifier, - BodyPartQualifier, - Point, - Point3D, -) - -from kloppy.domain.models.event import ( + EventDataset, EventType, + FormationType, + GoalkeeperActionType, + GoalkeeperQualifier, + Orientation, PassQualifier, - BodyPartQualifier, + PassType, + PitchDimensions, + Point, + Point, + Point3D, + Position, + Provider, + Score, + SetPieceQualifier, + SetPieceType, + ShotResult, + build_coordinate_system, ) - from kloppy import opta from kloppy.infra.serializers.event.opta.deserializer import ( _get_end_coordinates, + _parse_f24_datetime, ) -class TestOpta: - """""" +@pytest.fixture(scope="module") +def dataset(base_dir) -> EventDataset: + """Load Opta data for FC København - FC Nordsjælland""" + dataset = opta.load( + f7_data=base_dir / "files" / "opta_f7.xml", + f24_data=base_dir / "files" / "opta_f24.xml", + coordinates="opta", + ) + assert dataset.dataset_type == DatasetType.EVENT + return dataset - @pytest.fixture - def f24_data(self, base_dir) -> str: - return base_dir / "files/opta_f24.xml" - @pytest.fixture - def f7_data(self, base_dir) -> str: - return base_dir / "files/opta_f7.xml" +@pytest.mark.xfail +def test_parse_f24_datetime(): + """Test if the F24 datetime is correctly parsed""" + # timestamps have millisecond precision + assert ( + _parse_f24_datetime("2018-09-23T15:02:13.608") + == datetime( + 2018, 9, 23, 15, 2, 13, 608000, tzinfo=timezone.utc + ).timestamp() + ) + # milliseconds are not left-padded + assert ( + _parse_f24_datetime("2018-09-23T15:02:14.39") + == datetime( + 2018, 9, 23, 15, 2, 14, 39000, tzinfo=timezone.utc + ).timestamp() + ) - def test_correct_deserialization(self, f7_data: str, f24_data: str): - dataset = opta.load( - f24_data=f24_data, f7_data=f7_data, coordinates="opta" - ) + +class TestOptaMetadata: + """Tests related to deserializing metadata (i.e., the F7 feed)""" + + def test_provider(self, dataset): + """It should set the Opta provider""" assert dataset.metadata.provider == Provider.OPTA - assert dataset.dataset_type == DatasetType.EVENT - assert len(dataset.events) == 33 - assert len(dataset.metadata.periods) == 5 - assert ( - dataset.events[10].ball_owning_team == dataset.metadata.teams[1] - ) # 1594254267 - assert ( - dataset.events[15].ball_owning_team == dataset.metadata.teams[0] - ) # 2087733359 + + def test_orientation(self, dataset): + """It should set the action-executing-team orientation""" assert ( dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM ) + + def test_framerate(self, dataset): + """It should set the frame rate to None""" + assert dataset.metadata.frame_rate is None + + def test_teams(self, dataset): + """It should create the teams and player objects""" + # There should be two teams with the correct names and starting formations assert dataset.metadata.teams[0].name == "FC København" - assert dataset.metadata.teams[0].ground == Ground.HOME assert dataset.metadata.teams[0].starting_formation == FormationType( "4-4-2" ) assert dataset.metadata.teams[1].name == "FC Nordsjælland" - assert dataset.metadata.teams[1].ground == Ground.AWAY assert dataset.metadata.teams[1].starting_formation == FormationType( "4-3-3" ) - - player = dataset.metadata.teams[0].players[0] + # The teams should have the correct players + player = dataset.metadata.teams[0].get_player_by_id("111319") assert player.player_id == "111319" assert player.jersey_no == 21 assert str(player) == "Jesse Joronen" - assert player.position.position_id == "1" - assert player.position.name == "Goalkeeper" - - assert dataset.metadata.periods[0] == Period( - id=1, - start_timestamp=1537714933.608, - end_timestamp=1537717701.222, - attacking_direction=AttackingDirection.NOT_SET, + + def test_player_position(self, dataset): + """It should set the correct player position from the events""" + # Starting players have a position + player = dataset.metadata.teams[0].get_player_by_id("111319") + assert player.position == Position( + position_id="1", name="Goalkeeper", coordinates=None + ) + assert player.starting + + # Substituted players have a "Substitute" position + sub_player = dataset.metadata.teams[0].get_player_by_id("88022") + assert sub_player.position == Position( + position_id="0", name="Substitute", coordinates=None ) - assert dataset.metadata.periods[1] == Period( - id=2, - start_timestamp=1537718728.873, - end_timestamp=1537721737.788, - attacking_direction=AttackingDirection.NOT_SET, + assert not sub_player.starting + + def test_periods(self, dataset): + """It should create the periods""" + assert len(dataset.metadata.periods) == 5 + assert dataset.metadata.periods[0].id == 1 + period_starts = [ + _parse_f24_datetime("2018-09-23T15:02:13.608"), + _parse_f24_datetime("2018-09-23T16:05:28.873"), + _parse_f24_datetime("2018-09-23T17:50:01.810"), + _parse_f24_datetime("2018-09-23T18:35:01.810"), + _parse_f24_datetime("2018-09-23T19:05:01.810"), + ] + period_ends = [ + _parse_f24_datetime("2018-09-23T15:48:21.222"), + _parse_f24_datetime("2018-09-23T16:55:37.788"), + _parse_f24_datetime("2018-09-23T18:20:01.810"), + _parse_f24_datetime("2018-09-23T18:50:01.810"), + _parse_f24_datetime("2018-09-23T19:25:01.810"), + ] + for i, period in enumerate(dataset.metadata.periods): + assert period.id == i + 1 + assert period.start_timestamp == period_starts[i] + assert period.end_timestamp == period_ends[i] + assert period.attacking_direction == AttackingDirection.NOT_SET + + def test_pitch_dimensions(self, dataset): + """It should set the correct pitch dimensions""" + assert dataset.metadata.pitch_dimensions == PitchDimensions( + x_dim=Dimension(0, 100), y_dim=Dimension(0, 100) ) - assert dataset.metadata.periods[4] == Period( - id=5, - start_timestamp=1537729501.81, - end_timestamp=1537730701.81, - attacking_direction=AttackingDirection.NOT_SET, + + def test_coordinate_system(self, dataset): + """It should set the correct coordinate system""" + assert dataset.metadata.coordinate_system == build_coordinate_system( + Provider.OPTA, width=100, length=100 ) - assert dataset.events[0].coordinates == Point(50.1, 49.4) + @pytest.mark.xfail + def test_score(self, dataset): + """It should set the correct score""" + assert dataset.metadata.score == Score(home=2, away=1) - # Check the qualifiers - assert ( - dataset.events[0].qualifiers[0].value == SetPieceType.KICK_OFF - ) # 1510681159 - assert ( - BodyPartQualifier(value=BodyPart.HEAD) - in dataset.events[6].qualifiers - ) # 1101592119 - assert ( - PassQualifier(value=PassType.THROUGH_BALL) - in dataset.events[6].qualifiers - ) # 1101592119 + @pytest.mark.xfail + def test_flags(self, dataset): + """It should set the correct flags""" assert ( - dataset.events[5].qualifiers[0].value == PassType.CHIPPED_PASS - ) # 1444075194 - assert ( - dataset.events[19].qualifiers[0].value == CardType.RED - ) # 2318695229 - assert ( - dataset.events[21].event_type == EventType.CLEARANCE - ) # 2498907287 + dataset.metadata.flags + == DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE + ) - # Check receiver coordinates for incomplete passes - assert dataset.events[6].receiver_coordinates.x == 45.5 - assert dataset.events[6].receiver_coordinates.y == 68.2 - # Check timestamp from qualifier in case of goal - assert dataset.events[17].timestamp == 139.65200018882751 # 2318695229 - # assert dataset.events[17].coordinates_y == 12 +class TestOptaEvent: + """Generic tests related to deserializing events (i.e., the F24 feed)""" - # Check Own goal - assert dataset.events[18].result.value == "OWN_GOAL" # 2318697001 - # Check OFFSIDE pass has end_coordinates - assert dataset.events[20].receiver_coordinates.x == 89.3 # 2360555167 + def test_generic_attributes(self, dataset: EventDataset): + """Test generic event attributes""" + event = dataset.get_event_by_id("1510681159") + assert event.event_id == "1510681159" + assert event.team.name == "FC København" + assert event.ball_owning_team.name == "FC København" + assert event.player.full_name == "Dame N'Doye" + assert event.coordinates == Point(50.1, 49.4) + assert event.raw_event.attrib["id"] == "1510681159" + assert event.related_event_ids == [] + assert event.period.id == 1 + assert event.timestamp == ( + _parse_f24_datetime("2018-09-23T15:02:14.39") # event timestamp + - _parse_f24_datetime("2018-09-23T15:02:13.608") # period start + ) + assert event.ball_state == BallState.ALIVE - # Check goalkeeper qualifiers - assert ( - dataset.events[23].get_qualifier_value(GoalkeeperQualifier) - == GoalkeeperActionType.SAVE + def test_correct_normalized_deserialization(self, base_dir): + """Test if the normalized deserialization is correct""" + dataset = opta.load( + f7_data=base_dir / "files" / "opta_f7.xml", + f24_data=base_dir / "files" / "opta_f24.xml", ) + event = dataset.get_event_by_id("1510681159") + assert event.coordinates == Point(0.501, 0.506) + + def test_ball_owning_team(self, dataset: EventDataset): + """Test if the ball owning team is correctly set""" assert ( - dataset.events[24].get_qualifier_value(GoalkeeperQualifier) - == GoalkeeperActionType.CLAIM + dataset.get_event_by_id("1594254267").ball_owning_team + == dataset.metadata.teams[1] ) assert ( - dataset.events[25].get_qualifier_value(GoalkeeperQualifier) - == GoalkeeperActionType.PUNCH + dataset.get_event_by_id("2087733359").ball_owning_team + == dataset.metadata.teams[0] ) + + def test_setpiece_qualifiers(self, dataset: EventDataset): + """Test if the qualifiers are correctly deserialized""" + kick_off = dataset.get_event_by_id("1510681159") assert ( - dataset.events[26].get_qualifier_value(GoalkeeperQualifier) - == GoalkeeperActionType.PICK_UP + kick_off.get_qualifier_value(SetPieceQualifier) + == SetPieceType.KICK_OFF ) + + def test_body_part_qualifiers(self, dataset: EventDataset): + """Test if the body part qualifiers are correctly deserialized""" + header = dataset.get_event_by_id("1101592119") + assert BodyPart.HEAD in header.get_qualifier_values(BodyPartQualifier) + + def test_card_qualifiers(self, dataset: EventDataset): + """Test if the card qualifiers are correctly deserialized""" + red_card = dataset.get_event_by_id("2318454729") + assert red_card.get_qualifier_value(CardQualifier) == CardType.RED + + def test_counter_attack_qualifiers(self, dataset: EventDataset): + """Test if the counter attack qualifiers are correctly deserialized""" + counter_attack = dataset.get_event_by_id("2318695229") assert ( - dataset.events[27].get_qualifier_value(GoalkeeperQualifier) - == GoalkeeperActionType.SMOTHER + counter_attack.get_qualifier_value(CounterAttackQualifier) is True ) - assert ( - dataset.events[28].event_type == EventType.INTERCEPTION - ) # 2609934569 - assert ( - dataset.events[29].event_type == EventType.MISCONTROL - ) # 250913217 - # Check counterattack - assert ( - CounterAttackQualifier(value=True) in dataset.events[17].qualifiers - ) # 2318695229 - # Check DuelQualifiers - assert DuelType.AERIAL in dataset.events[7].get_qualifier_values( - DuelQualifier +class TestOptaPassEvent: + """Tests related to deserialzing pass events""" + + def test_deserialize_all(self, dataset: EventDataset): + """It should deserialize all clearance events""" + events = dataset.find_all("pass") + assert len(events) == 14 + + def test_receiver_coordinates(self, dataset: EventDataset): + """Test if the receiver coordinates are correctly deserialized""" + # Check receiver coordinates for incomplete passes + incomplete_pass = dataset.get_event_by_id("1101592119") + assert incomplete_pass.receiver_coordinates.x == 45.5 + assert incomplete_pass.receiver_coordinates.y == 68.2 + + def test_end_coordinates(self, dataset: EventDataset): + """Test if the end coordinates are correctly deserialized""" + pass_event = dataset.get_event_by_id("2360555167") + assert pass_event.receiver_coordinates.x == 89.3 + + def test_pass_qualifiers(self, dataset: EventDataset): + """Test if the pass type qualfiers are correctly deserialized""" + through_ball = dataset.get_event_by_id("1101592119") + assert PassType.THROUGH_BALL in through_ball.get_qualifier_values( + PassQualifier ) - assert ( - dataset.events[8].get_qualifier_values(DuelQualifier)[1] - == DuelType.GROUND + chipped_pass = dataset.get_event_by_id("1444075194") + assert PassType.CHIPPED_PASS in chipped_pass.get_qualifier_values( + PassQualifier ) - def test_shot(self, f7_data: str, f24_data: str): - dataset = opta.load( - f24_data=f24_data, - f7_data=f7_data, - event_types=["shot"], - coordinates="opta", - ) - assert len(dataset.events) == 3 +class TestOptaClearanceEvent: + """Tests related to deserialzing clearance events""" + + def test_deserialize_all(self, dataset: EventDataset): + """It should deserialize all clearance events""" + events = dataset.find_all("clearance") + assert len(events) == 2 + + def test_correct_deserialization(self, dataset: EventDataset): + """Test if the clearance event is correctly deserialized""" + clearance = dataset.get_event_by_id("2498907287") + assert clearance.event_type == EventType.CLEARANCE + + +class TestOptaShotEvent: + """Tests related to deserialzing shot events""" + + def test_deserialize_all(self, dataset: EventDataset): + """It should deserialize all shot events""" + events = dataset.find_all("shot") + assert len(events) == 3 + + def test_correct_deserialization(self, dataset: EventDataset): + """Test if the shot event is correctly deserialized""" shot = dataset.get_event_by_id("2318695229") # A shot event should have a result assert shot.result == ShotResult.GOAL @@ -202,6 +307,14 @@ def test_shot(self, f7_data: str, f24_data: str): shot.get_qualifier_value(BodyPartQualifier) == BodyPart.LEFT_FOOT ) + def test_timestamp_goal(self, dataset: EventDataset): + """Check timestamp from qualifier in case of goal""" + goal = dataset.get_event_by_id("2318695229") + assert goal.timestamp == ( + _parse_f24_datetime("2018-09-23T16:07:48.525") # event timestamp + - _parse_f24_datetime("2018-09-23T16:05:28.873") # period start + ) + def test_shot_end_coordinates(self): """Shots should receive the correct end coordinates.""" # When no end coordinates are available, we return None @@ -276,24 +389,84 @@ def test_shot_end_coordinates(self): blocked_shot_on_target_qualifiers, start_coordinates ) == Point3D(x=99.1, y=52.5, z=opp_small) - def test_own_goal(self, f7_data: str, f24_data: str): - dataset = opta.load( - f24_data=f24_data, - f7_data=f7_data, - event_types=["shot"], - coordinates="opta", - ) - + def test_own_goal(self, dataset: EventDataset): + """Test if own goals are correctly deserialized""" own_goal = dataset.get_event_by_id("2318697001") assert own_goal.result == ShotResult.OWN_GOAL # Use the inverse coordinates of the goal location assert own_goal.result_coordinates == Point3D(0.0, 100 - 45.6, 1.9) - def test_correct_normalized_deserialization( - self, f7_data: str, f24_data: str - ): - dataset = opta.load( - f24_data=f24_data, - f7_data=f7_data, + +class TestOptaDuelEvent: + """Tests related to deserialzing duel events""" + + def test_deserialize_all(self, dataset: EventDataset): + """It should deserialize all duel events""" + events = dataset.find_all("duel") + assert len(events) == 3 + + def test_qualifiers(self, dataset: EventDataset): + """Test if the qualifiers are correctly deserialized""" + aerial_duel = dataset.get_event_by_id("1274474573") + assert DuelType.AERIAL in aerial_duel.get_qualifier_values( + DuelQualifier + ) + ground_duel = dataset.get_event_by_id("2140914735") + assert DuelType.GROUND in ground_duel.get_qualifier_values( + DuelQualifier + ) + + +class TestOptaGoalkeeperEvent: + """Tests related to deserialzing goalkeeper events""" + + def test_deserialize_all(self, dataset: EventDataset): + """It should deserialize all goalkeeper events""" + events = dataset.find_all("goalkeeper") + assert len(events) == 5 + + def test_qualifiers(self, dataset: EventDataset): + """Test if the qualifiers are correctly deserialized""" + save = dataset.get_event_by_id("2451170467") + assert ( + save.get_qualifier_value(GoalkeeperQualifier) + == GoalkeeperActionType.SAVE + ) + claim = dataset.get_event_by_id("2453149143") + assert ( + claim.get_qualifier_value(GoalkeeperQualifier) + == GoalkeeperActionType.CLAIM + ) + punch = dataset.get_event_by_id("2451094707") + assert ( + punch.get_qualifier_value(GoalkeeperQualifier) + == GoalkeeperActionType.PUNCH + ) + keeper_pick_up = dataset.get_event_by_id("2451098837") + assert ( + keeper_pick_up.get_qualifier_value(GoalkeeperQualifier) + == GoalkeeperActionType.PICK_UP + ) + smother = dataset.get_event_by_id("2438594253") + assert ( + smother.get_qualifier_value(GoalkeeperQualifier) + == GoalkeeperActionType.SMOTHER ) - assert dataset.events[0].coordinates == Point(0.501, 0.506) + + +class TestOptaInterceptionEvent: + """Tests related to deserialzing interception events""" + + def test_correct_deserialization(self, dataset: EventDataset): + """Test if the interception event is correctly deserialized""" + event = dataset.get_event_by_id("2609934569") + assert event.event_type == EventType.INTERCEPTION + + +class TestOptaMiscontrolEvent: + """Tests related to deserialzing miscontrol events""" + + def test_correct_deserialization(self, dataset: EventDataset): + """Test if the miscontrol event is correctly deserialized""" + event = dataset.get_event_by_id("2509132175") + assert event.event_type == EventType.MISCONTROL From e8ddb0c97002ace2827865d605c81cbfd95fdbf6 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Sat, 20 Jan 2024 12:35:53 +0100 Subject: [PATCH 6/9] fix(opta): Change dtype of score from str to int --- kloppy/infra/serializers/event/opta/deserializer.py | 4 ++-- kloppy/tests/test_opta.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/kloppy/infra/serializers/event/opta/deserializer.py b/kloppy/infra/serializers/event/opta/deserializer.py index 01a81e25..e9f0161e 100644 --- a/kloppy/infra/serializers/event/opta/deserializer.py +++ b/kloppy/infra/serializers/event/opta/deserializer.py @@ -705,10 +705,10 @@ def deserialize(self, inputs: OptaInputs) -> EventDataset: away_score = None for team_elm in team_elms: if team_elm.attrib["Side"] == "Home": - home_score = team_elm.attrib["Score"] + home_score = int(team_elm.attrib["Score"]) home_team = _team_from_xml_elm(team_elm, f7_root) elif team_elm.attrib["Side"] == "Away": - away_score = team_elm.attrib["Score"] + away_score = int(team_elm.attrib["Score"]) away_team = _team_from_xml_elm(team_elm, f7_root) else: raise DeserializationError( diff --git a/kloppy/tests/test_opta.py b/kloppy/tests/test_opta.py index 684fa2e9..fec29361 100644 --- a/kloppy/tests/test_opta.py +++ b/kloppy/tests/test_opta.py @@ -161,7 +161,6 @@ def test_coordinate_system(self, dataset): Provider.OPTA, width=100, length=100 ) - @pytest.mark.xfail def test_score(self, dataset): """It should set the correct score""" assert dataset.metadata.score == Score(home=2, away=1) From d5456e714bc6fb1720bbdb82c8b97fb19be440d8 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Sat, 20 Jan 2024 12:40:40 +0100 Subject: [PATCH 7/9] fix(opta): Add missing BALL_STATE flag --- kloppy/infra/serializers/event/opta/deserializer.py | 2 +- kloppy/tests/test_opta.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/kloppy/infra/serializers/event/opta/deserializer.py b/kloppy/infra/serializers/event/opta/deserializer.py index 01a81e25..b6bd9490 100644 --- a/kloppy/infra/serializers/event/opta/deserializer.py +++ b/kloppy/infra/serializers/event/opta/deserializer.py @@ -952,7 +952,7 @@ def deserialize(self, inputs: OptaInputs) -> EventDataset: score=score, frame_rate=None, orientation=Orientation.ACTION_EXECUTING_TEAM, - flags=DatasetFlag.BALL_OWNING_TEAM, + flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, provider=Provider.OPTA, coordinate_system=transformer.get_to_coordinate_system(), ) diff --git a/kloppy/tests/test_opta.py b/kloppy/tests/test_opta.py index 684fa2e9..0b323e65 100644 --- a/kloppy/tests/test_opta.py +++ b/kloppy/tests/test_opta.py @@ -166,7 +166,6 @@ def test_score(self, dataset): """It should set the correct score""" assert dataset.metadata.score == Score(home=2, away=1) - @pytest.mark.xfail def test_flags(self, dataset): """It should set the correct flags""" assert ( From 25021d9fe8eb0a588a287d07695e5030a9c7d3f1 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Sat, 20 Jan 2024 12:53:36 +0100 Subject: [PATCH 8/9] fix(opta): Fix Opta F24 timestamp parsing Opta does not zero-pad milliseconds. Therefore, they were incorrectly parsed by Python's default "%f" format code. See also #267 --- kloppy/infra/serializers/event/opta/deserializer.py | 5 +++++ kloppy/tests/test_opta.py | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/kloppy/infra/serializers/event/opta/deserializer.py b/kloppy/infra/serializers/event/opta/deserializer.py index 01a81e25..e2bbce35 100644 --- a/kloppy/infra/serializers/event/opta/deserializer.py +++ b/kloppy/infra/serializers/event/opta/deserializer.py @@ -246,6 +246,11 @@ def _parse_f24_datetime(dt_str: str) -> float: + def zero_pad_milliseconds(timestamp): + parts = timestamp.split(".") + return ".".join(parts[:-1] + ["{:03d}".format(int(parts[-1]))]) + + dt_str = zero_pad_milliseconds(dt_str) return ( datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f") .replace(tzinfo=pytz.utc) diff --git a/kloppy/tests/test_opta.py b/kloppy/tests/test_opta.py index 684fa2e9..f4be28a4 100644 --- a/kloppy/tests/test_opta.py +++ b/kloppy/tests/test_opta.py @@ -56,7 +56,6 @@ def dataset(base_dir) -> EventDataset: return dataset -@pytest.mark.xfail def test_parse_f24_datetime(): """Test if the F24 datetime is correctly parsed""" # timestamps have millisecond precision From b06133f7780a2a89a0bba85a0b6695fae4d56ec5 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Sun, 14 Jan 2024 17:08:00 +0100 Subject: [PATCH 9/9] fix(metrica): Fix metadata after transform The pitch dimensions and coordinate system were not updated when deserializing metrica data with the non-default coordinate system. --- .../infra/serializers/event/metrica/json_deserializer.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kloppy/infra/serializers/event/metrica/json_deserializer.py b/kloppy/infra/serializers/event/metrica/json_deserializer.py index 07665319..3d846837 100644 --- a/kloppy/infra/serializers/event/metrica/json_deserializer.py +++ b/kloppy/infra/serializers/event/metrica/json_deserializer.py @@ -1,6 +1,7 @@ -from typing import Dict, List, NamedTuple, IO, Optional import logging import json +from dataclasses import replace +from typing import Dict, List, NamedTuple, IO, Optional from kloppy.domain import ( BallState, @@ -384,6 +385,10 @@ def deserialize(self, inputs: MetricaJsonEventDataInputs) -> EventDataset: events.append(transformer.transform_event(event)) return EventDataset( - metadata=metadata, + metadata=replace( + metadata, + pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, + coordinate_system=transformer.get_to_coordinate_system(), + ), records=events, )