Skip to content

Commit

Permalink
fix: common bug in parsing of UTC datetimes
Browse files Browse the repository at this point in the history
  • Loading branch information
probberechts committed Dec 14, 2024
1 parent dff0204 commit 7ab2204
Show file tree
Hide file tree
Showing 8 changed files with 40 additions and 34 deletions.
9 changes: 5 additions & 4 deletions kloppy/infra/serializers/event/datafactory/deserializer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import logging
from datetime import timedelta, datetime, timezone
from dateutil.parser import parse, _parser
from dataclasses import replace
from typing import Dict, List, Tuple, Union, IO, NamedTuple

Expand Down Expand Up @@ -435,7 +434,7 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset:
+ status_update["time"]
+ match["stadiumGMT"],
"%Y%m%d%H:%M:%S%z",
).astimezone(timezone.utc)
)
half = status_update["t"]["half"]
if status_update["type"] == DF_EVENT_TYPE_STATUS_MATCH_START:
half = 1
Expand All @@ -458,8 +457,10 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset:
date = match["date"]
if date:
# TODO: scheduledStart and stadiumGMT should probably be used here too
date = parse(date).astimezone(timezone.utc)
except _parser.ParserError:
date = datetime.strptime(date, "%Y%m%d").replace(
timezone.utc
)
except ValueError:
date = None
game_week = match.get("week", None)
if game_week:
Expand Down
7 changes: 3 additions & 4 deletions kloppy/infra/serializers/event/sportec/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import Dict, List, NamedTuple, IO
from datetime import timedelta, datetime, timezone
import logging
from dateutil.parser import parse
from lxml import objectify

from kloppy.domain import (
Expand Down Expand Up @@ -277,7 +276,7 @@ def _event_chain_from_xml_elm(event_elm):


def _parse_datetime(dt_str: str) -> datetime:
return parse(dt_str).astimezone(timezone.utc)
return datetime.fromisoformat(dt_str)


def _get_event_qualifiers(event_chain: Dict) -> List[Qualifier]:
Expand Down Expand Up @@ -432,9 +431,9 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset:
event_root = objectify.fromstring(inputs.event_data.read())

with performance_logging("parse data", logger=logger):
date = parse(
date = datetime.fromisoformat(
match_root.MatchInformation.General.attrib["KickoffTime"]
).astimezone(timezone.utc)
)
game_week = match_root.MatchInformation.General.attrib["MatchDay"]
game_id = match_root.MatchInformation.General.attrib["MatchId"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pytz
from datetime import datetime, timezone
from typing import List, Optional
from dateutil.parser import parse

from .base import OptaXMLParser, OptaEvent

Expand All @@ -16,7 +15,7 @@ def zero_pad_milliseconds(timestamp):

dt_str = zero_pad_milliseconds(dt_str)
return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f").replace(
tzinfo=pytz.utc
tzinfo=pytz.timezone("Europe/London")
)


Expand Down Expand Up @@ -58,7 +57,9 @@ def extract_date(self) -> Optional[str]:
"""Return the date of the game."""
game_elm = self.root.find("Game")
if game_elm and "game_date" in game_elm.attrib:
return parse(game_elm.attrib["game_date"]).astimezone(timezone.utc)
return datetime.strptime(
game_elm.attrib["game_date"], "%Y-%m-%dT%H:%M:%S"
).replace(pytz.timezone("Europe/London"))
else:
return None

Expand Down
11 changes: 7 additions & 4 deletions kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
"""JSON parser for Stats Perform MA1 feeds."""
import pytz

from datetime import datetime, timezone
from typing import Any, Optional, List, Tuple, Dict
from typing import Any, Dict, List, Optional, Tuple

from kloppy.domain import Period, Score, Team, Ground, Player
import pytz

from kloppy.domain import Ground, Period, Player, Score, Team
from kloppy.exceptions import DeserializationError

from .base import OptaJSONParser


Expand Down Expand Up @@ -100,7 +103,7 @@ def extract_date(self) -> Optional[str]:
if "matchInfo" in self.root and "date" in self.root["matchInfo"]:
return datetime.strptime(
self.root["matchInfo"]["date"], "%Y-%m-%dZ"
).astimezone(timezone.utc)
).replace(tzinfo=timezone.utc)
else:
return None

Expand Down
8 changes: 4 additions & 4 deletions kloppy/infra/serializers/event/wyscout/deserializer_v3.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import json
import logging
from dataclasses import replace
from datetime import timedelta, timezone
from datetime import datetime, timedelta, timezone
from enum import Enum
from typing import Dict, List, Optional

from dateutil.parser import parse

from kloppy.domain import (
BodyPart,
BodyPartQualifier,
Expand Down Expand Up @@ -709,7 +707,9 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
)
date = raw_events["match"].get("dateutc")
if date:
date = parse(date).astimezone(timezone.utc)
date = datetime.strptime(date, "%Y-%m-%d %H:%M:%S").replace(
tzinfo=timezone.utc
)
game_week = raw_events["match"].get("gameweek")
if game_week:
game_week = str(game_week)
Expand Down
18 changes: 10 additions & 8 deletions kloppy/infra/serializers/tracking/skillcorner.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import json
import logging
from datetime import timedelta, timezone
from dateutil.parser import parse
import warnings
from typing import NamedTuple, IO, Optional, Union, Dict
from collections import Counter
import numpy as np
import json
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import IO, Dict, NamedTuple, Optional, Union

import numpy as np

from kloppy.domain import (
attacking_direction_from_frame,
AttackingDirection,
DatasetFlag,
Frame,
Expand All @@ -18,14 +17,15 @@
Orientation,
Period,
Player,
PlayerData,
Point,
Point3D,
PositionType,
Provider,
Score,
Team,
TrackingDataset,
PlayerData,
attacking_direction_from_frame,
)
from kloppy.infra.serializers.tracking.deserializer import (
TrackingDataDeserializer,
Expand Down Expand Up @@ -367,7 +367,9 @@ def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset:

date = metadata.get("date_time")
if date:
date = parse(date).astimezone(timezone.utc)
date = datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ").replace(
tzinfo=timezone.utc
)

game_id = metadata.get("id")
if game_id:
Expand Down
13 changes: 7 additions & 6 deletions kloppy/infra/serializers/tracking/tracab/tracab_dat.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import logging
from datetime import timedelta, timezone
from datetime import datetime, timedelta, timezone
import warnings
from typing import Dict, Optional, Union
import html
from dateutil.parser import parse

from lxml import objectify

Expand Down Expand Up @@ -184,9 +183,9 @@ def deserialize(self, inputs: TRACABInputs) -> TrackingDataset:
pitch_size_height = float(
match.attrib["fPitchYSizeMeters"].replace(",", ".")
)
date = parse(meta_data.match.attrib["dtDate"]).astimezone(
timezone.utc
)
date = datetime.strptime(
meta_data.match.attrib["dtDate"], "%Y-%m-%d %H:%M:%S"
).replace(tzinfo=timezone.utc)
game_id = meta_data.match.attrib["iId"]

for period in match.iterchildren(tag="period"):
Expand All @@ -205,7 +204,9 @@ def deserialize(self, inputs: TRACABInputs) -> TrackingDataset:
)
)
elif hasattr(meta_data, "Phase1StartFrame"):
date = parse(str(meta_data["Kickoff"]))
date = datetime.strptime(
str(meta_data["Kickoff"]), "%Y-%m-%d %H:%M:%S"
).replace(tzinfo=timezone.utc)
game_id = str(meta_data["GameID"])
id_suffix = "ID"
player_item = "item"
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def setup_package():
"requests>=2.0.0,<3",
"pytz>=2020.1",
'typing_extensions;python_version<"3.11"',
"python-dateutil>=2.8.1,<3",
"sortedcontainers>=2",
],
extras_require={
Expand Down

0 comments on commit 7ab2204

Please sign in to comment.