From 401edc1de0e6fa8c42dc7060b60e832097302546 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Sat, 14 Dec 2024 21:05:18 +0100 Subject: [PATCH] fix: common bug in parsing of UTC datetimes --- .../serializers/event/datafactory/deserializer.py | 9 +++++---- .../infra/serializers/event/sportec/deserializer.py | 7 +++---- .../event/statsperform/parsers/f24_xml.py | 7 ++++--- .../event/statsperform/parsers/ma1_json.py | 11 +++++++---- .../serializers/event/wyscout/deserializer_v3.py | 8 ++++---- kloppy/infra/serializers/tracking/skillcorner.py | 7 ++++--- .../infra/serializers/tracking/tracab/tracab_dat.py | 13 +++++++------ setup.py | 1 - 8 files changed, 34 insertions(+), 29 deletions(-) diff --git a/kloppy/infra/serializers/event/datafactory/deserializer.py b/kloppy/infra/serializers/event/datafactory/deserializer.py index cf3d11eb..0f2eb3ca 100644 --- a/kloppy/infra/serializers/event/datafactory/deserializer.py +++ b/kloppy/infra/serializers/event/datafactory/deserializer.py @@ -1,7 +1,6 @@ import json import logging from datetime import timedelta, datetime, timezone -from dateutil.parser import parse, _parser from dataclasses import replace from typing import Dict, List, Tuple, Union, IO, NamedTuple @@ -435,7 +434,7 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset: + status_update["time"] + match["stadiumGMT"], "%Y%m%d%H:%M:%S%z", - ).astimezone(timezone.utc) + ) half = status_update["t"]["half"] if status_update["type"] == DF_EVENT_TYPE_STATUS_MATCH_START: half = 1 @@ -458,8 +457,10 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset: date = match["date"] if date: # TODO: scheduledStart and stadiumGMT should probably be used here too - date = parse(date).astimezone(timezone.utc) - except _parser.ParserError: + date = datetime.strptime(date, "%Y%m%d").replace( + timezone.utc + ) + except ValueError: date = None game_week = match.get("week", None) if game_week: diff --git a/kloppy/infra/serializers/event/sportec/deserializer.py b/kloppy/infra/serializers/event/sportec/deserializer.py index 14895206..f36ac5cb 100644 --- a/kloppy/infra/serializers/event/sportec/deserializer.py +++ b/kloppy/infra/serializers/event/sportec/deserializer.py @@ -2,7 +2,6 @@ from typing import Dict, List, NamedTuple, IO from datetime import timedelta, datetime, timezone import logging -from dateutil.parser import parse from lxml import objectify from kloppy.domain import ( @@ -277,7 +276,7 @@ def _event_chain_from_xml_elm(event_elm): def _parse_datetime(dt_str: str) -> datetime: - return parse(dt_str).astimezone(timezone.utc) + return datetime.fromisoformat(dt_str) def _get_event_qualifiers(event_chain: Dict) -> List[Qualifier]: @@ -432,9 +431,9 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset: event_root = objectify.fromstring(inputs.event_data.read()) with performance_logging("parse data", logger=logger): - date = parse( + date = datetime.fromisoformat( match_root.MatchInformation.General.attrib["KickoffTime"] - ).astimezone(timezone.utc) + ) game_week = match_root.MatchInformation.General.attrib["MatchDay"] game_id = match_root.MatchInformation.General.attrib["MatchId"] diff --git a/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py b/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py index f32dbd95..f2963bde 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py @@ -2,7 +2,6 @@ import pytz from datetime import datetime, timezone from typing import List, Optional -from dateutil.parser import parse from .base import OptaXMLParser, OptaEvent @@ -16,7 +15,7 @@ def zero_pad_milliseconds(timestamp): dt_str = zero_pad_milliseconds(dt_str) return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f").replace( - tzinfo=pytz.utc + tzinfo=pytz.timezone("Europe/London") ) @@ -58,7 +57,9 @@ def extract_date(self) -> Optional[str]: """Return the date of the game.""" game_elm = self.root.find("Game") if game_elm and "game_date" in game_elm.attrib: - return parse(game_elm.attrib["game_date"]).astimezone(timezone.utc) + return datetime.strptime( + game_elm.attrib["game_date"], "%Y-%m-%dT%H:%M:%S" + ).replace(pytz.timezone("Europe/London")) else: return None diff --git a/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py b/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py index c9aa3974..40d915af 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py @@ -1,10 +1,13 @@ """JSON parser for Stats Perform MA1 feeds.""" -import pytz + from datetime import datetime, timezone -from typing import Any, Optional, List, Tuple, Dict +from typing import Any, Dict, List, Optional, Tuple -from kloppy.domain import Period, Score, Team, Ground, Player +import pytz + +from kloppy.domain import Ground, Period, Player, Score, Team from kloppy.exceptions import DeserializationError + from .base import OptaJSONParser @@ -100,7 +103,7 @@ def extract_date(self) -> Optional[str]: if "matchInfo" in self.root and "date" in self.root["matchInfo"]: return datetime.strptime( self.root["matchInfo"]["date"], "%Y-%m-%dZ" - ).astimezone(timezone.utc) + ).replace(tz=timezone.utc) else: return None diff --git a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py index 8e2143aa..1ef620e4 100644 --- a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py +++ b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py @@ -1,12 +1,10 @@ import json import logging from dataclasses import replace -from datetime import timedelta, timezone +from datetime import datetime, timedelta, timezone from enum import Enum from typing import Dict, List, Optional -from dateutil.parser import parse - from kloppy.domain import ( BodyPart, BodyPartQualifier, @@ -709,7 +707,9 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset: ) date = raw_events["match"].get("dateutc") if date: - date = parse(date).astimezone(timezone.utc) + date = datetime.strptime(date, "%Y-%m-%d %H:%M:%S").replace( + tzinfo=timezone.utc + ) game_week = raw_events["match"].get("gameweek") if game_week: game_week = str(game_week) diff --git a/kloppy/infra/serializers/tracking/skillcorner.py b/kloppy/infra/serializers/tracking/skillcorner.py index b5cc0306..e1f4be75 100644 --- a/kloppy/infra/serializers/tracking/skillcorner.py +++ b/kloppy/infra/serializers/tracking/skillcorner.py @@ -1,6 +1,5 @@ import logging -from datetime import timedelta, timezone -from dateutil.parser import parse +from datetime import datetime, timedelta, timezone import warnings from typing import NamedTuple, IO, Optional, Union, Dict from collections import Counter @@ -367,7 +366,9 @@ def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset: date = metadata.get("date_time") if date: - date = parse(date).astimezone(timezone.utc) + date = datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ").replace( + timezone.utc + ) game_id = metadata.get("id") if game_id: diff --git a/kloppy/infra/serializers/tracking/tracab/tracab_dat.py b/kloppy/infra/serializers/tracking/tracab/tracab_dat.py index 831370cb..bdff42b8 100644 --- a/kloppy/infra/serializers/tracking/tracab/tracab_dat.py +++ b/kloppy/infra/serializers/tracking/tracab/tracab_dat.py @@ -1,9 +1,8 @@ import logging -from datetime import timedelta, timezone +from datetime import datetime, timedelta, timezone import warnings from typing import Dict, Optional, Union import html -from dateutil.parser import parse from lxml import objectify @@ -184,9 +183,9 @@ def deserialize(self, inputs: TRACABInputs) -> TrackingDataset: pitch_size_height = float( match.attrib["fPitchYSizeMeters"].replace(",", ".") ) - date = parse(meta_data.match.attrib["dtDate"]).astimezone( - timezone.utc - ) + date = datetime.strptime( + meta_data.match.attrib["dtDate"], "%Y-%m-%d %H:%M:%S" + ).replace(tz=timezone.utc) game_id = meta_data.match.attrib["iId"] for period in match.iterchildren(tag="period"): @@ -205,7 +204,9 @@ def deserialize(self, inputs: TRACABInputs) -> TrackingDataset: ) ) elif hasattr(meta_data, "Phase1StartFrame"): - date = parse(str(meta_data["Kickoff"])) + date = datetime.strptime( + meta_data["Kickoff"], "%Y-%m-%d %H:%M:%S" + ).replace(tz=timezone.utc) game_id = str(meta_data["GameID"]) id_suffix = "ID" player_item = "item" diff --git a/setup.py b/setup.py index 6d78f9c5..a2ed9746 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,6 @@ def setup_package(): "requests>=2.0.0,<3", "pytz>=2020.1", 'typing_extensions;python_version<"3.11"', - "python-dateutil>=2.8.1,<3", "sortedcontainers>=2", ], extras_require={