diff --git a/kloppy/infra/serializers/event/datafactory/deserializer.py b/kloppy/infra/serializers/event/datafactory/deserializer.py index cf3d11eb..44f5df20 100644 --- a/kloppy/infra/serializers/event/datafactory/deserializer.py +++ b/kloppy/infra/serializers/event/datafactory/deserializer.py @@ -1,9 +1,8 @@ import json import logging -from datetime import timedelta, datetime, timezone -from dateutil.parser import parse, _parser from dataclasses import replace -from typing import Dict, List, Tuple, Union, IO, NamedTuple +from datetime import datetime, timedelta, timezone +from typing import IO, Dict, List, NamedTuple, Tuple, Union from kloppy.domain import ( AttackingDirection, @@ -41,7 +40,6 @@ from kloppy.infra.serializers.event.deserializer import EventDataDeserializer from kloppy.utils import Readable, performance_logging - logger = logging.getLogger(__name__) @@ -435,7 +433,7 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset: + status_update["time"] + match["stadiumGMT"], "%Y%m%d%H:%M:%S%z", - ).astimezone(timezone.utc) + ) half = status_update["t"]["half"] if status_update["type"] == DF_EVENT_TYPE_STATUS_MATCH_START: half = 1 @@ -458,8 +456,10 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset: date = match["date"] if date: # TODO: scheduledStart and stadiumGMT should probably be used here too - date = parse(date).astimezone(timezone.utc) - except _parser.ParserError: + date = datetime.strptime(date, "%Y%m%d").replace( + tzinfo=timezone.utc + ) + except ValueError: date = None game_week = match.get("week", None) if game_week: diff --git a/kloppy/infra/serializers/event/sportec/deserializer.py b/kloppy/infra/serializers/event/sportec/deserializer.py index 14895206..f36ac5cb 100644 --- a/kloppy/infra/serializers/event/sportec/deserializer.py +++ b/kloppy/infra/serializers/event/sportec/deserializer.py @@ -2,7 +2,6 @@ from typing import Dict, List, NamedTuple, IO from datetime import timedelta, datetime, timezone import logging -from dateutil.parser import parse from lxml import objectify from kloppy.domain import ( @@ -277,7 +276,7 @@ def _event_chain_from_xml_elm(event_elm): def _parse_datetime(dt_str: str) -> datetime: - return parse(dt_str).astimezone(timezone.utc) + return datetime.fromisoformat(dt_str) def _get_event_qualifiers(event_chain: Dict) -> List[Qualifier]: @@ -432,9 +431,9 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset: event_root = objectify.fromstring(inputs.event_data.read()) with performance_logging("parse data", logger=logger): - date = parse( + date = datetime.fromisoformat( match_root.MatchInformation.General.attrib["KickoffTime"] - ).astimezone(timezone.utc) + ) game_week = match_root.MatchInformation.General.attrib["MatchDay"] game_id = match_root.MatchInformation.General.attrib["MatchId"] diff --git a/kloppy/infra/serializers/event/statsperform/deserializer.py b/kloppy/infra/serializers/event/statsperform/deserializer.py index f603717a..22304f2d 100644 --- a/kloppy/infra/serializers/event/statsperform/deserializer.py +++ b/kloppy/infra/serializers/event/statsperform/deserializer.py @@ -1,9 +1,10 @@ -import pytz import math from typing import Dict, List, NamedTuple, IO, Optional import logging from datetime import datetime, timedelta +import pytz + from kloppy.domain import ( EventDataset, Team, @@ -793,11 +794,18 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset: ): if raw_event.type_id == EVENT_TYPE_SHOT_GOAL: if 374 in raw_event.qualifiers: + # Qualifier 374 specifies the actual time of the shot for all goal events + # It uses London timezone for both MA3 and F24 feeds + naive_datetime = datetime.strptime( + raw_event.qualifiers[374], + "%Y-%m-%d %H:%M:%S.%f", + ) + timezone = pytz.timezone("Europe/London") + aware_datetime = timezone.localize( + naive_datetime + ) generic_event_kwargs["timestamp"] = ( - datetime.strptime( - raw_event.qualifiers[374], - "%Y-%m-%d %H:%M:%S.%f", - ).replace(tzinfo=pytz.utc) + aware_datetime.astimezone(pytz.utc) - period.start_timestamp ) shot_event_kwargs = _parse_shot(raw_event) diff --git a/kloppy/infra/serializers/event/statsperform/parsers/base.py b/kloppy/infra/serializers/event/statsperform/parsers/base.py index 3fee98b9..2a7ca7cc 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/base.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/base.py @@ -61,7 +61,7 @@ def extract_score(self) -> Optional[Score]: """Return the score of the game.""" return None - def extract_date(self) -> Optional[str]: + def extract_date(self) -> Optional[datetime]: """Return the date of the game.""" return None diff --git a/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py b/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py index f32dbd95..e8cb1ffb 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py @@ -1,10 +1,11 @@ """XML parser for Opta F24 feeds.""" -import pytz -from datetime import datetime, timezone + +from datetime import datetime from typing import List, Optional -from dateutil.parser import parse -from .base import OptaXMLParser, OptaEvent +import pytz + +from .base import OptaEvent, OptaXMLParser def _parse_f24_datetime(dt_str: str) -> datetime: @@ -15,9 +16,10 @@ def zero_pad_milliseconds(timestamp): return ".".join(parts[:-1] + ["{:03d}".format(int(parts[-1]))]) dt_str = zero_pad_milliseconds(dt_str) - return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f").replace( - tzinfo=pytz.utc - ) + naive_datetime = datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f") + timezone = pytz.timezone("Europe/London") + aware_datetime = timezone.localize(naive_datetime) + return aware_datetime.astimezone(pytz.utc) class F24XMLParser(OptaXMLParser): @@ -54,11 +56,16 @@ def extract_events(self) -> List[OptaEvent]: for event in game_elm.iterchildren("Event") ] - def extract_date(self) -> Optional[str]: + def extract_date(self) -> Optional[datetime]: """Return the date of the game.""" game_elm = self.root.find("Game") if game_elm and "game_date" in game_elm.attrib: - return parse(game_elm.attrib["game_date"]).astimezone(timezone.utc) + naive_datetime = datetime.strptime( + game_elm.attrib["game_date"], "%Y-%m-%dT%H:%M:%S" + ) + timezone = pytz.timezone("Europe/London") + aware_datetime = timezone.localize(naive_datetime) + return aware_datetime.astimezone(pytz.utc) else: return None diff --git a/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py b/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py index c9aa3974..8c1bf6e2 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py @@ -1,10 +1,11 @@ """JSON parser for Stats Perform MA1 feeds.""" -import pytz + from datetime import datetime, timezone -from typing import Any, Optional, List, Tuple, Dict +from typing import Any, Dict, List, Optional, Tuple -from kloppy.domain import Period, Score, Team, Ground, Player +from kloppy.domain import Ground, Period, Player, Score, Team from kloppy.exceptions import DeserializationError + from .base import OptaJSONParser @@ -30,12 +31,12 @@ def extract_periods(self) -> List[Period]: id=period["id"], start_timestamp=datetime.strptime( period_start_raw, "%Y-%m-%dT%H:%M:%SZ" - ).replace(tzinfo=pytz.utc) + ).replace(tzinfo=timezone.utc) if period_start_raw else None, end_timestamp=datetime.strptime( period_end_raw, "%Y-%m-%dT%H:%M:%SZ" - ).replace(tzinfo=pytz.utc) + ).replace(tzinfo=timezone.utc) if period_end_raw else None, ) @@ -95,12 +96,12 @@ def extract_lineups(self) -> Tuple[Team, Team]: raise DeserializationError("Lineup incomplete") return home_team, away_team - def extract_date(self) -> Optional[str]: + def extract_date(self) -> Optional[datetime]: """Return the date of the game.""" if "matchInfo" in self.root and "date" in self.root["matchInfo"]: return datetime.strptime( self.root["matchInfo"]["date"], "%Y-%m-%dZ" - ).astimezone(timezone.utc) + ).replace(tzinfo=timezone.utc) else: return None diff --git a/kloppy/infra/serializers/event/statsperform/parsers/ma1_xml.py b/kloppy/infra/serializers/event/statsperform/parsers/ma1_xml.py index 5b7bda49..92058877 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/ma1_xml.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/ma1_xml.py @@ -1,6 +1,5 @@ """XML parser for Stats Perform MA1 feeds.""" -import pytz -from datetime import datetime +from datetime import datetime, timezone from typing import Any, Optional, List, Dict, Tuple from kloppy.domain import Period, Score, Team, Ground, Player @@ -22,10 +21,10 @@ def extract_periods(self) -> List[Period]: id=int(period.get("id")), start_timestamp=datetime.strptime( period.get("start"), "%Y-%m-%dT%H:%M:%SZ" - ).replace(tzinfo=pytz.utc), + ).replace(tzinfo=timezone.utc), end_timestamp=datetime.strptime( period.get("end"), "%Y-%m-%dT%H:%M:%SZ" - ).replace(tzinfo=pytz.utc), + ).replace(tzinfo=timezone.utc), ) ) return parsed_periods diff --git a/kloppy/infra/serializers/event/statsperform/parsers/ma3_json.py b/kloppy/infra/serializers/event/statsperform/parsers/ma3_json.py index 59494bfa..a91cc148 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/ma3_json.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/ma3_json.py @@ -1,6 +1,5 @@ """JSON parser for Stats Perform MA3 feeds.""" -import pytz -from datetime import datetime +from datetime import datetime, timezone from typing import List from .base import OptaJSONParser, OptaEvent @@ -9,12 +8,12 @@ def _parse_ma3_datetime(dt_str: str) -> datetime: try: return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%fZ").replace( - tzinfo=pytz.utc + tzinfo=timezone.utc ) except ValueError: return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%SZ").replace( - tzinfo=pytz.utc + tzinfo=timezone.utc ) diff --git a/kloppy/infra/serializers/event/statsperform/parsers/ma3_xml.py b/kloppy/infra/serializers/event/statsperform/parsers/ma3_xml.py index 148b4d79..823f8313 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/ma3_xml.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/ma3_xml.py @@ -1,6 +1,5 @@ """XML parser for Stats Perform MA3 feeds.""" -import pytz -from datetime import datetime +from datetime import datetime, timezone from typing import List from .base import OptaXMLParser, OptaEvent @@ -9,11 +8,11 @@ def _parse_ma3_datetime(dt_str: str) -> datetime: try: return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%fZ").replace( - tzinfo=pytz.utc + tzinfo=timezone.utc ) except ValueError: return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%SZ").replace( - tzinfo=pytz.utc + tzinfo=timezone.utc ) diff --git a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py index 8e2143aa..1ef620e4 100644 --- a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py +++ b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py @@ -1,12 +1,10 @@ import json import logging from dataclasses import replace -from datetime import timedelta, timezone +from datetime import datetime, timedelta, timezone from enum import Enum from typing import Dict, List, Optional -from dateutil.parser import parse - from kloppy.domain import ( BodyPart, BodyPartQualifier, @@ -709,7 +707,9 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset: ) date = raw_events["match"].get("dateutc") if date: - date = parse(date).astimezone(timezone.utc) + date = datetime.strptime(date, "%Y-%m-%d %H:%M:%S").replace( + tzinfo=timezone.utc + ) game_week = raw_events["match"].get("gameweek") if game_week: game_week = str(game_week) diff --git a/kloppy/infra/serializers/tracking/skillcorner.py b/kloppy/infra/serializers/tracking/skillcorner.py index b5cc0306..f819a5af 100644 --- a/kloppy/infra/serializers/tracking/skillcorner.py +++ b/kloppy/infra/serializers/tracking/skillcorner.py @@ -1,15 +1,14 @@ +import json import logging -from datetime import timedelta, timezone -from dateutil.parser import parse import warnings -from typing import NamedTuple, IO, Optional, Union, Dict from collections import Counter -import numpy as np -import json +from datetime import datetime, timedelta, timezone from pathlib import Path +from typing import IO, Dict, NamedTuple, Optional, Union + +import numpy as np from kloppy.domain import ( - attacking_direction_from_frame, AttackingDirection, DatasetFlag, Frame, @@ -18,6 +17,7 @@ Orientation, Period, Player, + PlayerData, Point, Point3D, PositionType, @@ -25,7 +25,7 @@ Score, Team, TrackingDataset, - PlayerData, + attacking_direction_from_frame, ) from kloppy.infra.serializers.tracking.deserializer import ( TrackingDataDeserializer, @@ -367,7 +367,9 @@ def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset: date = metadata.get("date_time") if date: - date = parse(date).astimezone(timezone.utc) + date = datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ").replace( + tzinfo=timezone.utc + ) game_id = metadata.get("id") if game_id: diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index 3f418375..8d886484 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -2,8 +2,7 @@ import warnings from collections import defaultdict from typing import NamedTuple, Optional, Union, IO -from datetime import timedelta, timezone -from dateutil.parser import parse +from datetime import datetime, timedelta from lxml import objectify @@ -131,9 +130,9 @@ def deserialize( away_coach = sportec_metadata.away_coach with performance_logging("parse raw data", logger=logger): - date = parse( + date = datetime.fromisoformat( match_root.MatchInformation.General.attrib["KickoffTime"] - ).astimezone(timezone.utc) + ) game_week = match_root.MatchInformation.General.attrib["MatchDay"] game_id = match_root.MatchInformation.General.attrib["MatchId"] diff --git a/kloppy/infra/serializers/tracking/tracab/tracab_dat.py b/kloppy/infra/serializers/tracking/tracab/tracab_dat.py index 831370cb..001efdfa 100644 --- a/kloppy/infra/serializers/tracking/tracab/tracab_dat.py +++ b/kloppy/infra/serializers/tracking/tracab/tracab_dat.py @@ -1,9 +1,8 @@ import logging -from datetime import timedelta, timezone +from datetime import datetime, timedelta, timezone import warnings from typing import Dict, Optional, Union import html -from dateutil.parser import parse from lxml import objectify @@ -184,9 +183,9 @@ def deserialize(self, inputs: TRACABInputs) -> TrackingDataset: pitch_size_height = float( match.attrib["fPitchYSizeMeters"].replace(",", ".") ) - date = parse(meta_data.match.attrib["dtDate"]).astimezone( - timezone.utc - ) + date = datetime.strptime( + meta_data.match.attrib["dtDate"], "%Y-%m-%d %H:%M:%S" + ).replace(tzinfo=timezone.utc) game_id = meta_data.match.attrib["iId"] for period in match.iterchildren(tag="period"): @@ -205,7 +204,9 @@ def deserialize(self, inputs: TRACABInputs) -> TrackingDataset: ) ) elif hasattr(meta_data, "Phase1StartFrame"): - date = parse(str(meta_data["Kickoff"])) + date = datetime.strptime( + str(meta_data["Kickoff"]), "%Y-%m-%d %H:%M:%S" + ).replace(tzinfo=timezone.utc) game_id = str(meta_data["GameID"]) id_suffix = "ID" player_item = "item" diff --git a/kloppy/tests/test_opta.py b/kloppy/tests/test_opta.py index b38db5fa..f0ad8ba3 100644 --- a/kloppy/tests/test_opta.py +++ b/kloppy/tests/test_opta.py @@ -61,11 +61,11 @@ def test_parse_f24_datetime(): """Test if the F24 datetime is correctly parsed""" # timestamps have millisecond precision assert _parse_f24_datetime("2018-09-23T15:02:13.608") == datetime( - 2018, 9, 23, 15, 2, 13, 608000, tzinfo=timezone.utc + 2018, 9, 23, 14, 2, 13, 608000, tzinfo=timezone.utc ) # milliseconds are not left-padded assert _parse_f24_datetime("2018-09-23T15:02:14.39") == datetime( - 2018, 9, 23, 15, 2, 14, 39000, tzinfo=timezone.utc + 2018, 9, 23, 14, 2, 14, 39000, tzinfo=timezone.utc ) @@ -325,7 +325,7 @@ def test_correct_deserialization(self, dataset: EventDataset): ) def test_timestamp_goal(self, dataset: EventDataset): - """Check timestamp from qualifier in case of goal""" + """Check timestamp from qualifier 374 in case of goal""" goal = dataset.get_event_by_id("2318695229") assert goal.timestamp == ( _parse_f24_datetime("2018-09-23T16:07:48.525") # event timestamp diff --git a/setup.py b/setup.py index 6d78f9c5..a2ed9746 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,6 @@ def setup_package(): "requests>=2.0.0,<3", "pytz>=2020.1", 'typing_extensions;python_version<"3.11"', - "python-dateutil>=2.8.1,<3", "sortedcontainers>=2", ], extras_require={