From e359a991c5da675a95d827ce534330e5669f02cb Mon Sep 17 00:00:00 2001 From: Dries Deprest Date: Tue, 17 Dec 2024 18:00:39 +0100 Subject: [PATCH 1/4] fix(Stats Perform): Ignore 19/"Player on" events as they are already incorporated in SubstitutionEvent (#361) --- kloppy/infra/serializers/event/statsperform/deserializer.py | 2 ++ kloppy/tests/issues/issue_60/test_issue_60.py | 4 ++-- kloppy/tests/test_adapter.py | 2 +- kloppy/tests/test_statsperform.py | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/kloppy/infra/serializers/event/statsperform/deserializer.py b/kloppy/infra/serializers/event/statsperform/deserializer.py index f603717a..5ec3dac0 100644 --- a/kloppy/infra/serializers/event/statsperform/deserializer.py +++ b/kloppy/infra/serializers/event/statsperform/deserializer.py @@ -724,6 +724,8 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset: f"Set end of period {period.id} to {raw_event.timestamp}" ) period.end_timestamp = raw_event.timestamp + elif raw_event.type_id == EVENT_TYPE_PLAYER_ON: + continue else: if not period.start_timestamp: # not started yet diff --git a/kloppy/tests/issues/issue_60/test_issue_60.py b/kloppy/tests/issues/issue_60/test_issue_60.py index 5687973e..4d14b972 100644 --- a/kloppy/tests/issues/issue_60/test_issue_60.py +++ b/kloppy/tests/issues/issue_60/test_issue_60.py @@ -16,7 +16,7 @@ def test_deleted_event_opta(self): assert deleted_event_id not in df["event_id"].to_list() # OPTA F24 file: Pass -> Deleted Event -> Tackle - assert event_dataset.events[16].event_name == "pass" + assert event_dataset.events[15].event_name == "pass" assert ( - event_dataset.events[17].event_name == "duel" + event_dataset.events[16].event_name == "duel" ) # Deleted Event is filter out diff --git a/kloppy/tests/test_adapter.py b/kloppy/tests/test_adapter.py index e6ff29c7..10409958 100644 --- a/kloppy/tests/test_adapter.py +++ b/kloppy/tests/test_adapter.py @@ -57,4 +57,4 @@ def read_to_stream(self, url: str, output: BinaryIO): # Asserts borrowed from `test_opta.py` assert dataset.metadata.provider == Provider.OPTA assert dataset.dataset_type == DatasetType.EVENT - assert len(dataset.events) == 40 + assert len(dataset.events) == 39 diff --git a/kloppy/tests/test_statsperform.py b/kloppy/tests/test_statsperform.py index f1c772d1..9e8e9070 100644 --- a/kloppy/tests/test_statsperform.py +++ b/kloppy/tests/test_statsperform.py @@ -177,7 +177,7 @@ def test_deserialize_all(self, event_dataset: EventDataset): pitch_length=None, pitch_width=None, ) - assert len(event_dataset.records) == 1652 + assert len(event_dataset.records) == 1643 substitution_events = event_dataset.find_all("substitution") assert len(substitution_events) == 9 From 45ab84c668f1f0947a97e776971569cb94e2661a Mon Sep 17 00:00:00 2001 From: UnravelSports <64530306+UnravelSports@users.noreply.github.com> Date: Tue, 17 Dec 2024 20:07:57 +0100 Subject: [PATCH 2/4] feat(sportec): add referees to metadata; fix(sportec): parsing tracking data with referee --------- Co-authored-by: UnravelSports [JB] Co-authored-by: Pieter Robberechts --- kloppy/domain/models/common.py | 43 +- .../serializers/event/sportec/deserializer.py | 38 + .../tracking/sportec/deserializer.py | 10 + .../files/sportec_positional_w_referee.xml | 671 ++++++++++++++++++ kloppy/tests/test_sportec.py | 56 ++ kloppy/utils.py | 5 + 6 files changed, 822 insertions(+), 1 deletion(-) create mode 100644 kloppy/tests/files/sportec_positional_w_referee.xml diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index c1830d1b..b4880451 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -20,7 +20,7 @@ from .position import PositionType -from ...utils import deprecated +from ...utils import deprecated, snake_case if sys.version_info >= (3, 8): from typing import Literal @@ -119,6 +119,46 @@ def __str__(self): return self.value +class OfficialType(Enum): + """Enumeration for types of officials (referees).""" + + VideoAssistantReferee = "Video Assistant Referee" + MainReferee = "Main Referee" + AssistantReferee = "Assistant Referee" + FourthOfficial = "Fourth Official" + + def __str__(self): + return self.value + + +@dataclass(frozen=True) +class Official: + """ + Represents an official (referee) with optional names and roles. + """ + + official_id: str + name: Optional[str] = None + first_name: Optional[str] = None + last_name: Optional[str] = None + role: Optional[OfficialType] = None + + @property + def full_name(self): + """ + Returns the full name of the official, falling back to role-based or ID-based naming. + """ + if self.name: + return self.name + if self.first_name and self.last_name: + return f"{self.first_name} {self.last_name}" + if self.last_name: + return self.last_name + if self.role: + return f"{snake_case(str(self.role))}_{self.official_id}" + return f"official_{self.official_id}" + + @dataclass(frozen=True) class Player: """ @@ -1016,6 +1056,7 @@ class Metadata: game_id: Optional[str] = None home_coach: Optional[str] = None away_coach: Optional[str] = None + officials: Optional[List] = field(default_factory=list) attributes: Optional[Dict] = field(default_factory=dict, compare=False) def __post_init__(self): diff --git a/kloppy/infra/serializers/event/sportec/deserializer.py b/kloppy/infra/serializers/event/sportec/deserializer.py index 14895206..57d105a4 100644 --- a/kloppy/infra/serializers/event/sportec/deserializer.py +++ b/kloppy/infra/serializers/event/sportec/deserializer.py @@ -29,6 +29,8 @@ CardType, AttackingDirection, PositionType, + Official, + OfficialType, ) from kloppy.exceptions import DeserializationError from kloppy.infra.serializers.event.deserializer import EventDataDeserializer @@ -55,6 +57,14 @@ "LA": PositionType.LeftWing, } +referee_types_mapping: Dict[str, OfficialType] = { + "referee": OfficialType.MainReferee, + "firstAssistant": OfficialType.AssistantReferee, + "videoReferee": OfficialType.VideoAssistantReferee, + "secondAssistant": OfficialType.AssistantReferee, + "fourthOfficial": OfficialType.FourthOfficial, +} + logger = logging.getLogger(__name__) @@ -102,6 +112,7 @@ class SportecMetadata(NamedTuple): fps: int home_coach: str away_coach: str + officials: List[Official] def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata: @@ -213,6 +224,31 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata: ] ) + if hasattr(match_root, "MatchInformation") and hasattr( + match_root.MatchInformation, "Referees" + ): + officials = [] + referee_path = objectify.ObjectPath( + "PutDataRequest.MatchInformation.Referees" + ) + referee_elms = referee_path.find(match_root).iterchildren( + tag="Referee" + ) + + for referee in referee_elms: + ref_attrib = referee.attrib + officials.append( + Official( + official_id=ref_attrib["PersonId"], + name=ref_attrib["Shortname"], + first_name=ref_attrib["FirstName"], + last_name=ref_attrib["LastName"], + role=referee_types_mapping[ref_attrib["Role"]], + ) + ) + else: + officials = [] + return SportecMetadata( score=score, teams=teams, @@ -222,6 +258,7 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata: fps=SPORTEC_FPS, home_coach=home_coach, away_coach=away_coach, + officials=officials, ) @@ -673,6 +710,7 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset: game_id=game_id, home_coach=home_coach, away_coach=away_coach, + officials=sportec_metadata.officials, ) return EventDataset( diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index 3f418375..7cc08516 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -122,6 +122,7 @@ def deserialize( with performance_logging("parse metadata", logger=logger): sportec_metadata = sportec_metadata_from_xml_elm(match_root) teams = home_team, away_team = sportec_metadata.teams + periods = sportec_metadata.periods transformer = self.get_transformer( pitch_length=sportec_metadata.x_max, @@ -130,6 +131,12 @@ def deserialize( home_coach = sportec_metadata.home_coach away_coach = sportec_metadata.away_coach + official_ids = [] + if sportec_metadata.officials: + official_ids = [ + x.official_id for x in sportec_metadata.officials + ] + with performance_logging("parse raw data", logger=logger): date = parse( match_root.MatchInformation.General.attrib["KickoffTime"] @@ -156,6 +163,7 @@ def _iter(): for i, (frame_id, frame_data) in enumerate( sorted(raw_frames.items()) ): + if "ball" not in frame_data: # Frames without ball data are corrupt. continue @@ -193,6 +201,7 @@ def _iter(): ) for player_id, raw_player_data in frame_data.items() if player_id != "ball" + and player_id not in official_ids }, other_data={}, ball_coordinates=Point3D( @@ -242,6 +251,7 @@ def _iter(): game_id=game_id, home_coach=home_coach, away_coach=away_coach, + officials=sportec_metadata.officials, ) return TrackingDataset( diff --git a/kloppy/tests/files/sportec_positional_w_referee.xml b/kloppy/tests/files/sportec_positional_w_referee.xml new file mode 100644 index 00000000..d9f12d8f --- /dev/null +++ b/kloppy/tests/files/sportec_positional_w_referee.xml @@ -0,0 +1,671 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py index 1c11bb78..ac8ad2de 100644 --- a/kloppy/tests/test_sportec.py +++ b/kloppy/tests/test_sportec.py @@ -16,6 +16,8 @@ BallState, Point3D, PositionType, + OfficialType, + Official, ) from kloppy import sportec @@ -119,6 +121,10 @@ class TestSportecTrackingData: def raw_data(self, base_dir) -> str: return base_dir / "files/sportec_positional.xml" + @pytest.fixture + def raw_data_referee(self, base_dir) -> str: + return base_dir / "files/sportec_positional_w_referee.xml" + @pytest.fixture def meta_data(self, base_dir) -> str: return base_dir / "files/sportec_meta.xml" @@ -145,6 +151,7 @@ def test_load_metadata(self, raw_data: Path, meta_data: Path): assert dataset.metadata.periods[1].end_timestamp == timedelta( seconds=4000 + 2996.68 ) + assert len(dataset.metadata.officials) == 4 def test_load_frames(self, raw_data: Path, meta_data: Path): dataset = sportec.load_tracking( @@ -238,3 +245,52 @@ def test_enriched_metadata(self, raw_data: Path, meta_data: Path): if away_coach: assert isinstance(away_coach, str) assert away_coach == "M. Rose" + + def test_referees(self, raw_data_referee: Path, meta_data: Path): + dataset = sportec.load_tracking( + raw_data=raw_data_referee, + meta_data=meta_data, + coordinates="sportec", + only_alive=True, + ) + assert len(dataset.metadata.officials) == 4 + + assert ( + Official( + official_id="42", + name="Pierluigi Collina", + role=OfficialType.MainReferee, + ).role.value + == "Main Referee" + ) + + assert ( + Official( + official_id="42", + name="Pierluigi Collina", + role=OfficialType.MainReferee, + ).full_name + == "Pierluigi Collina" + ) + assert ( + Official( + official_id="42", + first_name="Pierluigi", + last_name="Collina", + role=OfficialType.MainReferee, + ).full_name + == "Pierluigi Collina" + ) + assert ( + Official( + official_id="42", + last_name="Collina", + role=OfficialType.MainReferee, + ).full_name + == "Collina" + ) + assert ( + Official(official_id="42", role=OfficialType.MainReferee).full_name + == "main_referee_42" + ) + assert Official(official_id="42").full_name == "official_42" diff --git a/kloppy/utils.py b/kloppy/utils.py index b0858398..68d36af2 100644 --- a/kloppy/utils.py +++ b/kloppy/utils.py @@ -169,3 +169,8 @@ def __get__(self, instance, owner): stacklevel=2, ) return self.value + + +def snake_case(s: str) -> str: + """Convert a string to snake_case.""" + return re.sub(r"[\s\-]+", "_", s.strip()).lower() From b0f56e126732b54bbcef98f5edcf42087b1e201d Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Tue, 17 Dec 2024 20:56:18 +0100 Subject: [PATCH 3/4] fix: common bug in parsing of UTC datetimes (#373) --- .../event/datafactory/deserializer.py | 14 +++++------ .../serializers/event/sportec/deserializer.py | 7 +++--- .../event/statsperform/deserializer.py | 18 +++++++++---- .../event/statsperform/parsers/base.py | 2 +- .../event/statsperform/parsers/f24_xml.py | 25 ++++++++++++------- .../event/statsperform/parsers/ma1_json.py | 15 +++++------ .../event/statsperform/parsers/ma1_xml.py | 7 +++--- .../event/statsperform/parsers/ma3_json.py | 7 +++--- .../event/statsperform/parsers/ma3_xml.py | 7 +++--- .../event/wyscout/deserializer_v3.py | 8 +++--- .../infra/serializers/tracking/skillcorner.py | 18 +++++++------ .../tracking/sportec/deserializer.py | 7 +++--- .../serializers/tracking/tracab/tracab_dat.py | 13 +++++----- kloppy/tests/test_opta.py | 6 ++--- setup.py | 1 - 15 files changed, 84 insertions(+), 71 deletions(-) diff --git a/kloppy/infra/serializers/event/datafactory/deserializer.py b/kloppy/infra/serializers/event/datafactory/deserializer.py index cf3d11eb..44f5df20 100644 --- a/kloppy/infra/serializers/event/datafactory/deserializer.py +++ b/kloppy/infra/serializers/event/datafactory/deserializer.py @@ -1,9 +1,8 @@ import json import logging -from datetime import timedelta, datetime, timezone -from dateutil.parser import parse, _parser from dataclasses import replace -from typing import Dict, List, Tuple, Union, IO, NamedTuple +from datetime import datetime, timedelta, timezone +from typing import IO, Dict, List, NamedTuple, Tuple, Union from kloppy.domain import ( AttackingDirection, @@ -41,7 +40,6 @@ from kloppy.infra.serializers.event.deserializer import EventDataDeserializer from kloppy.utils import Readable, performance_logging - logger = logging.getLogger(__name__) @@ -435,7 +433,7 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset: + status_update["time"] + match["stadiumGMT"], "%Y%m%d%H:%M:%S%z", - ).astimezone(timezone.utc) + ) half = status_update["t"]["half"] if status_update["type"] == DF_EVENT_TYPE_STATUS_MATCH_START: half = 1 @@ -458,8 +456,10 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset: date = match["date"] if date: # TODO: scheduledStart and stadiumGMT should probably be used here too - date = parse(date).astimezone(timezone.utc) - except _parser.ParserError: + date = datetime.strptime(date, "%Y%m%d").replace( + tzinfo=timezone.utc + ) + except ValueError: date = None game_week = match.get("week", None) if game_week: diff --git a/kloppy/infra/serializers/event/sportec/deserializer.py b/kloppy/infra/serializers/event/sportec/deserializer.py index 57d105a4..b240db49 100644 --- a/kloppy/infra/serializers/event/sportec/deserializer.py +++ b/kloppy/infra/serializers/event/sportec/deserializer.py @@ -2,7 +2,6 @@ from typing import Dict, List, NamedTuple, IO from datetime import timedelta, datetime, timezone import logging -from dateutil.parser import parse from lxml import objectify from kloppy.domain import ( @@ -314,7 +313,7 @@ def _event_chain_from_xml_elm(event_elm): def _parse_datetime(dt_str: str) -> datetime: - return parse(dt_str).astimezone(timezone.utc) + return datetime.fromisoformat(dt_str) def _get_event_qualifiers(event_chain: Dict) -> List[Qualifier]: @@ -469,9 +468,9 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset: event_root = objectify.fromstring(inputs.event_data.read()) with performance_logging("parse data", logger=logger): - date = parse( + date = datetime.fromisoformat( match_root.MatchInformation.General.attrib["KickoffTime"] - ).astimezone(timezone.utc) + ) game_week = match_root.MatchInformation.General.attrib["MatchDay"] game_id = match_root.MatchInformation.General.attrib["MatchId"] diff --git a/kloppy/infra/serializers/event/statsperform/deserializer.py b/kloppy/infra/serializers/event/statsperform/deserializer.py index 5ec3dac0..95bf9e9c 100644 --- a/kloppy/infra/serializers/event/statsperform/deserializer.py +++ b/kloppy/infra/serializers/event/statsperform/deserializer.py @@ -1,9 +1,10 @@ -import pytz import math from typing import Dict, List, NamedTuple, IO, Optional import logging from datetime import datetime, timedelta +import pytz + from kloppy.domain import ( EventDataset, Team, @@ -795,11 +796,18 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset: ): if raw_event.type_id == EVENT_TYPE_SHOT_GOAL: if 374 in raw_event.qualifiers: + # Qualifier 374 specifies the actual time of the shot for all goal events + # It uses London timezone for both MA3 and F24 feeds + naive_datetime = datetime.strptime( + raw_event.qualifiers[374], + "%Y-%m-%d %H:%M:%S.%f", + ) + timezone = pytz.timezone("Europe/London") + aware_datetime = timezone.localize( + naive_datetime + ) generic_event_kwargs["timestamp"] = ( - datetime.strptime( - raw_event.qualifiers[374], - "%Y-%m-%d %H:%M:%S.%f", - ).replace(tzinfo=pytz.utc) + aware_datetime.astimezone(pytz.utc) - period.start_timestamp ) shot_event_kwargs = _parse_shot(raw_event) diff --git a/kloppy/infra/serializers/event/statsperform/parsers/base.py b/kloppy/infra/serializers/event/statsperform/parsers/base.py index 3fee98b9..2a7ca7cc 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/base.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/base.py @@ -61,7 +61,7 @@ def extract_score(self) -> Optional[Score]: """Return the score of the game.""" return None - def extract_date(self) -> Optional[str]: + def extract_date(self) -> Optional[datetime]: """Return the date of the game.""" return None diff --git a/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py b/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py index f32dbd95..e8cb1ffb 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py @@ -1,10 +1,11 @@ """XML parser for Opta F24 feeds.""" -import pytz -from datetime import datetime, timezone + +from datetime import datetime from typing import List, Optional -from dateutil.parser import parse -from .base import OptaXMLParser, OptaEvent +import pytz + +from .base import OptaEvent, OptaXMLParser def _parse_f24_datetime(dt_str: str) -> datetime: @@ -15,9 +16,10 @@ def zero_pad_milliseconds(timestamp): return ".".join(parts[:-1] + ["{:03d}".format(int(parts[-1]))]) dt_str = zero_pad_milliseconds(dt_str) - return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f").replace( - tzinfo=pytz.utc - ) + naive_datetime = datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f") + timezone = pytz.timezone("Europe/London") + aware_datetime = timezone.localize(naive_datetime) + return aware_datetime.astimezone(pytz.utc) class F24XMLParser(OptaXMLParser): @@ -54,11 +56,16 @@ def extract_events(self) -> List[OptaEvent]: for event in game_elm.iterchildren("Event") ] - def extract_date(self) -> Optional[str]: + def extract_date(self) -> Optional[datetime]: """Return the date of the game.""" game_elm = self.root.find("Game") if game_elm and "game_date" in game_elm.attrib: - return parse(game_elm.attrib["game_date"]).astimezone(timezone.utc) + naive_datetime = datetime.strptime( + game_elm.attrib["game_date"], "%Y-%m-%dT%H:%M:%S" + ) + timezone = pytz.timezone("Europe/London") + aware_datetime = timezone.localize(naive_datetime) + return aware_datetime.astimezone(pytz.utc) else: return None diff --git a/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py b/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py index c9aa3974..8c1bf6e2 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py @@ -1,10 +1,11 @@ """JSON parser for Stats Perform MA1 feeds.""" -import pytz + from datetime import datetime, timezone -from typing import Any, Optional, List, Tuple, Dict +from typing import Any, Dict, List, Optional, Tuple -from kloppy.domain import Period, Score, Team, Ground, Player +from kloppy.domain import Ground, Period, Player, Score, Team from kloppy.exceptions import DeserializationError + from .base import OptaJSONParser @@ -30,12 +31,12 @@ def extract_periods(self) -> List[Period]: id=period["id"], start_timestamp=datetime.strptime( period_start_raw, "%Y-%m-%dT%H:%M:%SZ" - ).replace(tzinfo=pytz.utc) + ).replace(tzinfo=timezone.utc) if period_start_raw else None, end_timestamp=datetime.strptime( period_end_raw, "%Y-%m-%dT%H:%M:%SZ" - ).replace(tzinfo=pytz.utc) + ).replace(tzinfo=timezone.utc) if period_end_raw else None, ) @@ -95,12 +96,12 @@ def extract_lineups(self) -> Tuple[Team, Team]: raise DeserializationError("Lineup incomplete") return home_team, away_team - def extract_date(self) -> Optional[str]: + def extract_date(self) -> Optional[datetime]: """Return the date of the game.""" if "matchInfo" in self.root and "date" in self.root["matchInfo"]: return datetime.strptime( self.root["matchInfo"]["date"], "%Y-%m-%dZ" - ).astimezone(timezone.utc) + ).replace(tzinfo=timezone.utc) else: return None diff --git a/kloppy/infra/serializers/event/statsperform/parsers/ma1_xml.py b/kloppy/infra/serializers/event/statsperform/parsers/ma1_xml.py index 5b7bda49..92058877 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/ma1_xml.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/ma1_xml.py @@ -1,6 +1,5 @@ """XML parser for Stats Perform MA1 feeds.""" -import pytz -from datetime import datetime +from datetime import datetime, timezone from typing import Any, Optional, List, Dict, Tuple from kloppy.domain import Period, Score, Team, Ground, Player @@ -22,10 +21,10 @@ def extract_periods(self) -> List[Period]: id=int(period.get("id")), start_timestamp=datetime.strptime( period.get("start"), "%Y-%m-%dT%H:%M:%SZ" - ).replace(tzinfo=pytz.utc), + ).replace(tzinfo=timezone.utc), end_timestamp=datetime.strptime( period.get("end"), "%Y-%m-%dT%H:%M:%SZ" - ).replace(tzinfo=pytz.utc), + ).replace(tzinfo=timezone.utc), ) ) return parsed_periods diff --git a/kloppy/infra/serializers/event/statsperform/parsers/ma3_json.py b/kloppy/infra/serializers/event/statsperform/parsers/ma3_json.py index 59494bfa..a91cc148 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/ma3_json.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/ma3_json.py @@ -1,6 +1,5 @@ """JSON parser for Stats Perform MA3 feeds.""" -import pytz -from datetime import datetime +from datetime import datetime, timezone from typing import List from .base import OptaJSONParser, OptaEvent @@ -9,12 +8,12 @@ def _parse_ma3_datetime(dt_str: str) -> datetime: try: return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%fZ").replace( - tzinfo=pytz.utc + tzinfo=timezone.utc ) except ValueError: return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%SZ").replace( - tzinfo=pytz.utc + tzinfo=timezone.utc ) diff --git a/kloppy/infra/serializers/event/statsperform/parsers/ma3_xml.py b/kloppy/infra/serializers/event/statsperform/parsers/ma3_xml.py index 148b4d79..823f8313 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/ma3_xml.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/ma3_xml.py @@ -1,6 +1,5 @@ """XML parser for Stats Perform MA3 feeds.""" -import pytz -from datetime import datetime +from datetime import datetime, timezone from typing import List from .base import OptaXMLParser, OptaEvent @@ -9,11 +8,11 @@ def _parse_ma3_datetime(dt_str: str) -> datetime: try: return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%fZ").replace( - tzinfo=pytz.utc + tzinfo=timezone.utc ) except ValueError: return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%SZ").replace( - tzinfo=pytz.utc + tzinfo=timezone.utc ) diff --git a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py index 8e2143aa..1ef620e4 100644 --- a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py +++ b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py @@ -1,12 +1,10 @@ import json import logging from dataclasses import replace -from datetime import timedelta, timezone +from datetime import datetime, timedelta, timezone from enum import Enum from typing import Dict, List, Optional -from dateutil.parser import parse - from kloppy.domain import ( BodyPart, BodyPartQualifier, @@ -709,7 +707,9 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset: ) date = raw_events["match"].get("dateutc") if date: - date = parse(date).astimezone(timezone.utc) + date = datetime.strptime(date, "%Y-%m-%d %H:%M:%S").replace( + tzinfo=timezone.utc + ) game_week = raw_events["match"].get("gameweek") if game_week: game_week = str(game_week) diff --git a/kloppy/infra/serializers/tracking/skillcorner.py b/kloppy/infra/serializers/tracking/skillcorner.py index b5cc0306..f819a5af 100644 --- a/kloppy/infra/serializers/tracking/skillcorner.py +++ b/kloppy/infra/serializers/tracking/skillcorner.py @@ -1,15 +1,14 @@ +import json import logging -from datetime import timedelta, timezone -from dateutil.parser import parse import warnings -from typing import NamedTuple, IO, Optional, Union, Dict from collections import Counter -import numpy as np -import json +from datetime import datetime, timedelta, timezone from pathlib import Path +from typing import IO, Dict, NamedTuple, Optional, Union + +import numpy as np from kloppy.domain import ( - attacking_direction_from_frame, AttackingDirection, DatasetFlag, Frame, @@ -18,6 +17,7 @@ Orientation, Period, Player, + PlayerData, Point, Point3D, PositionType, @@ -25,7 +25,7 @@ Score, Team, TrackingDataset, - PlayerData, + attacking_direction_from_frame, ) from kloppy.infra.serializers.tracking.deserializer import ( TrackingDataDeserializer, @@ -367,7 +367,9 @@ def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset: date = metadata.get("date_time") if date: - date = parse(date).astimezone(timezone.utc) + date = datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ").replace( + tzinfo=timezone.utc + ) game_id = metadata.get("id") if game_id: diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index 7cc08516..1ed04e1a 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -2,8 +2,7 @@ import warnings from collections import defaultdict from typing import NamedTuple, Optional, Union, IO -from datetime import timedelta, timezone -from dateutil.parser import parse +from datetime import datetime, timedelta from lxml import objectify @@ -138,9 +137,9 @@ def deserialize( ] with performance_logging("parse raw data", logger=logger): - date = parse( + date = datetime.fromisoformat( match_root.MatchInformation.General.attrib["KickoffTime"] - ).astimezone(timezone.utc) + ) game_week = match_root.MatchInformation.General.attrib["MatchDay"] game_id = match_root.MatchInformation.General.attrib["MatchId"] diff --git a/kloppy/infra/serializers/tracking/tracab/tracab_dat.py b/kloppy/infra/serializers/tracking/tracab/tracab_dat.py index 831370cb..001efdfa 100644 --- a/kloppy/infra/serializers/tracking/tracab/tracab_dat.py +++ b/kloppy/infra/serializers/tracking/tracab/tracab_dat.py @@ -1,9 +1,8 @@ import logging -from datetime import timedelta, timezone +from datetime import datetime, timedelta, timezone import warnings from typing import Dict, Optional, Union import html -from dateutil.parser import parse from lxml import objectify @@ -184,9 +183,9 @@ def deserialize(self, inputs: TRACABInputs) -> TrackingDataset: pitch_size_height = float( match.attrib["fPitchYSizeMeters"].replace(",", ".") ) - date = parse(meta_data.match.attrib["dtDate"]).astimezone( - timezone.utc - ) + date = datetime.strptime( + meta_data.match.attrib["dtDate"], "%Y-%m-%d %H:%M:%S" + ).replace(tzinfo=timezone.utc) game_id = meta_data.match.attrib["iId"] for period in match.iterchildren(tag="period"): @@ -205,7 +204,9 @@ def deserialize(self, inputs: TRACABInputs) -> TrackingDataset: ) ) elif hasattr(meta_data, "Phase1StartFrame"): - date = parse(str(meta_data["Kickoff"])) + date = datetime.strptime( + str(meta_data["Kickoff"]), "%Y-%m-%d %H:%M:%S" + ).replace(tzinfo=timezone.utc) game_id = str(meta_data["GameID"]) id_suffix = "ID" player_item = "item" diff --git a/kloppy/tests/test_opta.py b/kloppy/tests/test_opta.py index b38db5fa..f0ad8ba3 100644 --- a/kloppy/tests/test_opta.py +++ b/kloppy/tests/test_opta.py @@ -61,11 +61,11 @@ def test_parse_f24_datetime(): """Test if the F24 datetime is correctly parsed""" # timestamps have millisecond precision assert _parse_f24_datetime("2018-09-23T15:02:13.608") == datetime( - 2018, 9, 23, 15, 2, 13, 608000, tzinfo=timezone.utc + 2018, 9, 23, 14, 2, 13, 608000, tzinfo=timezone.utc ) # milliseconds are not left-padded assert _parse_f24_datetime("2018-09-23T15:02:14.39") == datetime( - 2018, 9, 23, 15, 2, 14, 39000, tzinfo=timezone.utc + 2018, 9, 23, 14, 2, 14, 39000, tzinfo=timezone.utc ) @@ -325,7 +325,7 @@ def test_correct_deserialization(self, dataset: EventDataset): ) def test_timestamp_goal(self, dataset: EventDataset): - """Check timestamp from qualifier in case of goal""" + """Check timestamp from qualifier 374 in case of goal""" goal = dataset.get_event_by_id("2318695229") assert goal.timestamp == ( _parse_f24_datetime("2018-09-23T16:07:48.525") # event timestamp diff --git a/setup.py b/setup.py index 6d78f9c5..a2ed9746 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,6 @@ def setup_package(): "requests>=2.0.0,<3", "pytz>=2020.1", 'typing_extensions;python_version<"3.11"', - "python-dateutil>=2.8.1,<3", "sortedcontainers>=2", ], extras_require={ From edee570a0ac66b9e2f9ac5bbde64ab1b3b4bc54c Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Tue, 17 Dec 2024 21:01:59 +0100 Subject: [PATCH 4/4] refactor: remove numpy dependency from SkillCorner (#375) --- .../infra/serializers/tracking/skillcorner.py | 64 ++++++++++--------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/kloppy/infra/serializers/tracking/skillcorner.py b/kloppy/infra/serializers/tracking/skillcorner.py index f819a5af..32e2d670 100644 --- a/kloppy/infra/serializers/tracking/skillcorner.py +++ b/kloppy/infra/serializers/tracking/skillcorner.py @@ -1,13 +1,11 @@ import json import logging import warnings -from collections import Counter +from collections import Counter, defaultdict from datetime import datetime, timedelta, timezone from pathlib import Path from typing import IO, Dict, NamedTuple, Optional, Union -import numpy as np - from kloppy.domain import ( AttackingDirection, DatasetFlag, @@ -207,22 +205,21 @@ def _get_skillcorner_attacking_directions(cls, frames, periods): x-coords might not reflect the attacking direction. """ attacking_directions = {} - frame_period_ids = np.array([_frame.period.id for _frame in frames]) - frame_attacking_directions = np.array( - [ - attacking_direction_from_frame(frame) - if len(frame.players_data) > 0 - else AttackingDirection.NOT_SET - for frame in frames - ] - ) + # Group attacking directions by period ID + period_direction_map = defaultdict(list) + for frame in frames: + if len(frame.players_data) > 0: + direction = attacking_direction_from_frame(frame) + else: + direction = AttackingDirection.NOT_SET + period_direction_map[frame.period.id].append(direction) + + # Determine the most common attacking direction for each period for period_id in periods.keys(): - if period_id in frame_period_ids: - count = Counter( - frame_attacking_directions[frame_period_ids == period_id] - ) - attacking_directions[period_id] = count.most_common()[0][0] + if period_id in period_direction_map: + count = Counter(period_direction_map[period_id]) + attacking_directions[period_id] = count.most_common(1)[0][0] else: attacking_directions[period_id] = AttackingDirection.NOT_SET @@ -252,28 +249,33 @@ def __get_periods(cls, tracking): """gets the Periods contained in the tracking data""" periods = {} - _periods = np.array([f["period"] for f in tracking]) - unique_periods = set(_periods) - unique_periods = [ - period for period in unique_periods if period is not None - ] + # Extract unique periods while filtering out None values + unique_periods = { + frame["period"] + for frame in tracking + if frame["period"] is not None + } for period in unique_periods: + # Filter frames that belong to the current period and have valid "time" _frames = [ frame for frame in tracking if frame["period"] == period and frame["time"] is not None ] - periods[period] = Period( - id=period, - start_timestamp=timedelta( - seconds=_frames[0]["frame"] / frame_rate - ), - end_timestamp=timedelta( - seconds=_frames[-1]["frame"] / frame_rate - ), - ) + # Ensure _frames is not empty before accessing the first and last elements + if _frames: + periods[period] = Period( + id=period, + start_timestamp=timedelta( + seconds=_frames[0]["frame"] / frame_rate + ), + end_timestamp=timedelta( + seconds=_frames[-1]["frame"] / frame_rate + ), + ) + return periods @classmethod