Skip to content

Commit

Permalink
fix: common bug in parsing of UTC datetimes
Browse files Browse the repository at this point in the history
  • Loading branch information
probberechts committed Dec 14, 2024
1 parent dff0204 commit 10db6d1
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 40 deletions.
14 changes: 7 additions & 7 deletions kloppy/infra/serializers/event/datafactory/deserializer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import json
import logging
from datetime import timedelta, datetime, timezone
from dateutil.parser import parse, _parser
from dataclasses import replace
from typing import Dict, List, Tuple, Union, IO, NamedTuple
from datetime import datetime, timedelta, timezone
from typing import IO, Dict, List, NamedTuple, Tuple, Union

from kloppy.domain import (
AttackingDirection,
Expand Down Expand Up @@ -41,7 +40,6 @@
from kloppy.infra.serializers.event.deserializer import EventDataDeserializer
from kloppy.utils import Readable, performance_logging


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -435,7 +433,7 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset:
+ status_update["time"]
+ match["stadiumGMT"],
"%Y%m%d%H:%M:%S%z",
).astimezone(timezone.utc)
)
half = status_update["t"]["half"]
if status_update["type"] == DF_EVENT_TYPE_STATUS_MATCH_START:
half = 1
Expand All @@ -458,8 +456,10 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset:
date = match["date"]
if date:
# TODO: scheduledStart and stadiumGMT should probably be used here too
date = parse(date).astimezone(timezone.utc)
except _parser.ParserError:
date = datetime.strptime(date, "%Y%m%d").replace(
tzinfo=timezone.utc
)
except ValueError:
date = None
game_week = match.get("week", None)
if game_week:
Expand Down
7 changes: 3 additions & 4 deletions kloppy/infra/serializers/event/sportec/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import Dict, List, NamedTuple, IO
from datetime import timedelta, datetime, timezone
import logging
from dateutil.parser import parse
from lxml import objectify

from kloppy.domain import (
Expand Down Expand Up @@ -277,7 +276,7 @@ def _event_chain_from_xml_elm(event_elm):


def _parse_datetime(dt_str: str) -> datetime:
return parse(dt_str).astimezone(timezone.utc)
return datetime.fromisoformat(dt_str)


def _get_event_qualifiers(event_chain: Dict) -> List[Qualifier]:
Expand Down Expand Up @@ -432,9 +431,9 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset:
event_root = objectify.fromstring(inputs.event_data.read())

with performance_logging("parse data", logger=logger):
date = parse(
date = datetime.fromisoformat(
match_root.MatchInformation.General.attrib["KickoffTime"]
).astimezone(timezone.utc)
)
game_week = match_root.MatchInformation.General.attrib["MatchDay"]
game_id = match_root.MatchInformation.General.attrib["MatchId"]

Expand Down
24 changes: 18 additions & 6 deletions kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""XML parser for Opta F24 feeds."""
import pytz

from datetime import datetime, timezone
from typing import List, Optional
from dateutil.parser import parse

from .base import OptaXMLParser, OptaEvent
import pytz

from .base import OptaEvent, OptaXMLParser


def _parse_f24_datetime(dt_str: str) -> datetime:
Expand All @@ -15,8 +16,17 @@ def zero_pad_milliseconds(timestamp):
return ".".join(parts[:-1] + ["{:03d}".format(int(parts[-1]))])

dt_str = zero_pad_milliseconds(dt_str)
return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f").replace(
tzinfo=pytz.utc
print(
dt_str,
datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f"),
datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f").replace(
tzinfo=pytz.timezone("Europe/London")
),
)
return (
datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f")
.replace(tzinfo=pytz.timezone("Europe/London"))
.astimezone(timezone.utc)
)


Expand Down Expand Up @@ -58,7 +68,9 @@ def extract_date(self) -> Optional[str]:
"""Return the date of the game."""
game_elm = self.root.find("Game")
if game_elm and "game_date" in game_elm.attrib:
return parse(game_elm.attrib["game_date"]).astimezone(timezone.utc)
return datetime.strptime(
game_elm.attrib["game_date"], "%Y-%m-%dT%H:%M:%S"
).replace(pytz.timezone("Europe/London"))
else:
return None

Expand Down
11 changes: 7 additions & 4 deletions kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
"""JSON parser for Stats Perform MA1 feeds."""
import pytz

from datetime import datetime, timezone
from typing import Any, Optional, List, Tuple, Dict
from typing import Any, Dict, List, Optional, Tuple

from kloppy.domain import Period, Score, Team, Ground, Player
import pytz

from kloppy.domain import Ground, Period, Player, Score, Team
from kloppy.exceptions import DeserializationError

from .base import OptaJSONParser


Expand Down Expand Up @@ -100,7 +103,7 @@ def extract_date(self) -> Optional[str]:
if "matchInfo" in self.root and "date" in self.root["matchInfo"]:
return datetime.strptime(
self.root["matchInfo"]["date"], "%Y-%m-%dZ"
).astimezone(timezone.utc)
).replace(tzinfo=timezone.utc)
else:
return None

Expand Down
8 changes: 4 additions & 4 deletions kloppy/infra/serializers/event/wyscout/deserializer_v3.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import json
import logging
from dataclasses import replace
from datetime import timedelta, timezone
from datetime import datetime, timedelta, timezone
from enum import Enum
from typing import Dict, List, Optional

from dateutil.parser import parse

from kloppy.domain import (
BodyPart,
BodyPartQualifier,
Expand Down Expand Up @@ -709,7 +707,9 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
)
date = raw_events["match"].get("dateutc")
if date:
date = parse(date).astimezone(timezone.utc)
date = datetime.strptime(date, "%Y-%m-%d %H:%M:%S").replace(
tzinfo=timezone.utc
)
game_week = raw_events["match"].get("gameweek")
if game_week:
game_week = str(game_week)
Expand Down
18 changes: 10 additions & 8 deletions kloppy/infra/serializers/tracking/skillcorner.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import json
import logging
from datetime import timedelta, timezone
from dateutil.parser import parse
import warnings
from typing import NamedTuple, IO, Optional, Union, Dict
from collections import Counter
import numpy as np
import json
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import IO, Dict, NamedTuple, Optional, Union

import numpy as np

from kloppy.domain import (
attacking_direction_from_frame,
AttackingDirection,
DatasetFlag,
Frame,
Expand All @@ -18,14 +17,15 @@
Orientation,
Period,
Player,
PlayerData,
Point,
Point3D,
PositionType,
Provider,
Score,
Team,
TrackingDataset,
PlayerData,
attacking_direction_from_frame,
)
from kloppy.infra.serializers.tracking.deserializer import (
TrackingDataDeserializer,
Expand Down Expand Up @@ -367,7 +367,9 @@ def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset:

date = metadata.get("date_time")
if date:
date = parse(date).astimezone(timezone.utc)
date = datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ").replace(
tzinfo=timezone.utc
)

game_id = metadata.get("id")
if game_id:
Expand Down
13 changes: 7 additions & 6 deletions kloppy/infra/serializers/tracking/tracab/tracab_dat.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import logging
from datetime import timedelta, timezone
from datetime import datetime, timedelta, timezone
import warnings
from typing import Dict, Optional, Union
import html
from dateutil.parser import parse

from lxml import objectify

Expand Down Expand Up @@ -184,9 +183,9 @@ def deserialize(self, inputs: TRACABInputs) -> TrackingDataset:
pitch_size_height = float(
match.attrib["fPitchYSizeMeters"].replace(",", ".")
)
date = parse(meta_data.match.attrib["dtDate"]).astimezone(
timezone.utc
)
date = datetime.strptime(
meta_data.match.attrib["dtDate"], "%Y-%m-%d %H:%M:%S"
).replace(tzinfo=timezone.utc)
game_id = meta_data.match.attrib["iId"]

for period in match.iterchildren(tag="period"):
Expand All @@ -205,7 +204,9 @@ def deserialize(self, inputs: TRACABInputs) -> TrackingDataset:
)
)
elif hasattr(meta_data, "Phase1StartFrame"):
date = parse(str(meta_data["Kickoff"]))
date = datetime.strptime(
str(meta_data["Kickoff"]), "%Y-%m-%d %H:%M:%S"
).replace(tzinfo=timezone.utc)
game_id = str(meta_data["GameID"])
id_suffix = "ID"
player_item = "item"
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def setup_package():
"requests>=2.0.0,<3",
"pytz>=2020.1",
'typing_extensions;python_version<"3.11"',
"python-dateutil>=2.8.1,<3",
"sortedcontainers>=2",
],
extras_require={
Expand Down

0 comments on commit 10db6d1

Please sign in to comment.