Skip to content

Commit

Permalink
Merge branch 'master' into feature/statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
DriesDeprest committed Dec 18, 2024
2 parents c2002ce + 452efe7 commit cb4893e
Show file tree
Hide file tree
Showing 24 changed files with 973 additions and 137 deletions.
2 changes: 1 addition & 1 deletion kloppy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
# )
# from .domain.services.state_builder import add_state

__version__ = "3.15.0"
__version__ = "3.16.0"
43 changes: 42 additions & 1 deletion kloppy/domain/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from .position import PositionType

from ...utils import deprecated
from ...utils import deprecated, snake_case

if sys.version_info >= (3, 8):
from typing import Literal
Expand Down Expand Up @@ -119,6 +119,46 @@ def __str__(self):
return self.value


class OfficialType(Enum):
"""Enumeration for types of officials (referees)."""

VideoAssistantReferee = "Video Assistant Referee"
MainReferee = "Main Referee"
AssistantReferee = "Assistant Referee"
FourthOfficial = "Fourth Official"

def __str__(self):
return self.value


@dataclass(frozen=True)
class Official:
"""
Represents an official (referee) with optional names and roles.
"""

official_id: str
name: Optional[str] = None
first_name: Optional[str] = None
last_name: Optional[str] = None
role: Optional[OfficialType] = None

@property
def full_name(self):
"""
Returns the full name of the official, falling back to role-based or ID-based naming.
"""
if self.name:
return self.name
if self.first_name and self.last_name:
return f"{self.first_name} {self.last_name}"
if self.last_name:
return self.last_name
if self.role:
return f"{snake_case(str(self.role))}_{self.official_id}"
return f"official_{self.official_id}"


@dataclass(frozen=True)
class Player:
"""
Expand Down Expand Up @@ -1085,6 +1125,7 @@ class Metadata:
game_id: Optional[str] = None
home_coach: Optional[str] = None
away_coach: Optional[str] = None
officials: Optional[List] = field(default_factory=list)
attributes: Optional[Dict] = field(default_factory=dict, compare=False)

def __post_init__(self):
Expand Down
14 changes: 7 additions & 7 deletions kloppy/infra/serializers/event/datafactory/deserializer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import json
import logging
from datetime import timedelta, datetime, timezone
from dateutil.parser import parse, _parser
from dataclasses import replace
from typing import Dict, List, Tuple, Union, IO, NamedTuple
from datetime import datetime, timedelta, timezone
from typing import IO, Dict, List, NamedTuple, Tuple, Union

from kloppy.domain import (
AttackingDirection,
Expand Down Expand Up @@ -41,7 +40,6 @@
from kloppy.infra.serializers.event.deserializer import EventDataDeserializer
from kloppy.utils import Readable, performance_logging


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -435,7 +433,7 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset:
+ status_update["time"]
+ match["stadiumGMT"],
"%Y%m%d%H:%M:%S%z",
).astimezone(timezone.utc)
)
half = status_update["t"]["half"]
if status_update["type"] == DF_EVENT_TYPE_STATUS_MATCH_START:
half = 1
Expand All @@ -458,8 +456,10 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset:
date = match["date"]
if date:
# TODO: scheduledStart and stadiumGMT should probably be used here too
date = parse(date).astimezone(timezone.utc)
except _parser.ParserError:
date = datetime.strptime(date, "%Y%m%d").replace(
tzinfo=timezone.utc
)
except ValueError:
date = None
game_week = match.get("week", None)
if game_week:
Expand Down
45 changes: 41 additions & 4 deletions kloppy/infra/serializers/event/sportec/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import Dict, List, NamedTuple, IO
from datetime import timedelta, datetime, timezone
import logging
from dateutil.parser import parse
from lxml import objectify

from kloppy.domain import (
Expand All @@ -29,6 +28,8 @@
CardType,
AttackingDirection,
PositionType,
Official,
OfficialType,
)
from kloppy.exceptions import DeserializationError
from kloppy.infra.serializers.event.deserializer import EventDataDeserializer
Expand All @@ -55,6 +56,14 @@
"LA": PositionType.LeftWing,
}

referee_types_mapping: Dict[str, OfficialType] = {
"referee": OfficialType.MainReferee,
"firstAssistant": OfficialType.AssistantReferee,
"videoReferee": OfficialType.VideoAssistantReferee,
"secondAssistant": OfficialType.AssistantReferee,
"fourthOfficial": OfficialType.FourthOfficial,
}

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -102,6 +111,7 @@ class SportecMetadata(NamedTuple):
fps: int
home_coach: str
away_coach: str
officials: List[Official]


def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
Expand Down Expand Up @@ -213,6 +223,31 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
]
)

if hasattr(match_root, "MatchInformation") and hasattr(
match_root.MatchInformation, "Referees"
):
officials = []
referee_path = objectify.ObjectPath(
"PutDataRequest.MatchInformation.Referees"
)
referee_elms = referee_path.find(match_root).iterchildren(
tag="Referee"
)

for referee in referee_elms:
ref_attrib = referee.attrib
officials.append(
Official(
official_id=ref_attrib["PersonId"],
name=ref_attrib["Shortname"],
first_name=ref_attrib["FirstName"],
last_name=ref_attrib["LastName"],
role=referee_types_mapping[ref_attrib["Role"]],
)
)
else:
officials = []

return SportecMetadata(
score=score,
teams=teams,
Expand All @@ -222,6 +257,7 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
fps=SPORTEC_FPS,
home_coach=home_coach,
away_coach=away_coach,
officials=officials,
)


Expand Down Expand Up @@ -277,7 +313,7 @@ def _event_chain_from_xml_elm(event_elm):


def _parse_datetime(dt_str: str) -> datetime:
return parse(dt_str).astimezone(timezone.utc)
return datetime.fromisoformat(dt_str)


def _get_event_qualifiers(event_chain: Dict) -> List[Qualifier]:
Expand Down Expand Up @@ -432,9 +468,9 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset:
event_root = objectify.fromstring(inputs.event_data.read())

with performance_logging("parse data", logger=logger):
date = parse(
date = datetime.fromisoformat(
match_root.MatchInformation.General.attrib["KickoffTime"]
).astimezone(timezone.utc)
)
game_week = match_root.MatchInformation.General.attrib["MatchDay"]
game_id = match_root.MatchInformation.General.attrib["MatchId"]

Expand Down Expand Up @@ -673,6 +709,7 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset:
game_id=game_id,
home_coach=home_coach,
away_coach=away_coach,
officials=sportec_metadata.officials,
)

return EventDataset(
Expand Down
20 changes: 15 additions & 5 deletions kloppy/infra/serializers/event/statsperform/deserializer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import pytz
import math
from typing import Dict, List, NamedTuple, IO, Optional
import logging
from datetime import datetime, timedelta

import pytz

from kloppy.domain import (
EventDataset,
Team,
Expand Down Expand Up @@ -743,6 +744,8 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
f"Set end of period {period.id} to {raw_event.timestamp}"
)
period.end_timestamp = raw_event.timestamp
elif raw_event.type_id == EVENT_TYPE_PLAYER_ON:
continue
else:
if not period.start_timestamp:
# not started yet
Expand Down Expand Up @@ -812,11 +815,18 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
):
if raw_event.type_id == EVENT_TYPE_SHOT_GOAL:
if 374 in raw_event.qualifiers:
# Qualifier 374 specifies the actual time of the shot for all goal events
# It uses London timezone for both MA3 and F24 feeds
naive_datetime = datetime.strptime(
raw_event.qualifiers[374],
"%Y-%m-%d %H:%M:%S.%f",
)
timezone = pytz.timezone("Europe/London")
aware_datetime = timezone.localize(
naive_datetime
)
generic_event_kwargs["timestamp"] = (
datetime.strptime(
raw_event.qualifiers[374],
"%Y-%m-%d %H:%M:%S.%f",
).replace(tzinfo=pytz.utc)
aware_datetime.astimezone(pytz.utc)
- period.start_timestamp
)
shot_event_kwargs = _parse_shot(raw_event)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def extract_score(self) -> Optional[Score]:
"""Return the score of the game."""
return None

def extract_date(self) -> Optional[str]:
def extract_date(self) -> Optional[datetime]:
"""Return the date of the game."""
return None

Expand Down
25 changes: 16 additions & 9 deletions kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""XML parser for Opta F24 feeds."""
import pytz
from datetime import datetime, timezone

from datetime import datetime
from typing import List, Optional
from dateutil.parser import parse

from .base import OptaXMLParser, OptaEvent
import pytz

from .base import OptaEvent, OptaXMLParser


def _parse_f24_datetime(dt_str: str) -> datetime:
Expand All @@ -15,9 +16,10 @@ def zero_pad_milliseconds(timestamp):
return ".".join(parts[:-1] + ["{:03d}".format(int(parts[-1]))])

dt_str = zero_pad_milliseconds(dt_str)
return datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f").replace(
tzinfo=pytz.utc
)
naive_datetime = datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f")
timezone = pytz.timezone("Europe/London")
aware_datetime = timezone.localize(naive_datetime)
return aware_datetime.astimezone(pytz.utc)


class F24XMLParser(OptaXMLParser):
Expand Down Expand Up @@ -54,11 +56,16 @@ def extract_events(self) -> List[OptaEvent]:
for event in game_elm.iterchildren("Event")
]

def extract_date(self) -> Optional[str]:
def extract_date(self) -> Optional[datetime]:
"""Return the date of the game."""
game_elm = self.root.find("Game")
if game_elm and "game_date" in game_elm.attrib:
return parse(game_elm.attrib["game_date"]).astimezone(timezone.utc)
naive_datetime = datetime.strptime(
game_elm.attrib["game_date"], "%Y-%m-%dT%H:%M:%S"
)
timezone = pytz.timezone("Europe/London")
aware_datetime = timezone.localize(naive_datetime)
return aware_datetime.astimezone(pytz.utc)
else:
return None

Expand Down
15 changes: 8 additions & 7 deletions kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""JSON parser for Stats Perform MA1 feeds."""
import pytz

from datetime import datetime, timezone
from typing import Any, Optional, List, Tuple, Dict
from typing import Any, Dict, List, Optional, Tuple

from kloppy.domain import Period, Score, Team, Ground, Player
from kloppy.domain import Ground, Period, Player, Score, Team
from kloppy.exceptions import DeserializationError

from .base import OptaJSONParser


Expand All @@ -30,12 +31,12 @@ def extract_periods(self) -> List[Period]:
id=period["id"],
start_timestamp=datetime.strptime(
period_start_raw, "%Y-%m-%dT%H:%M:%SZ"
).replace(tzinfo=pytz.utc)
).replace(tzinfo=timezone.utc)
if period_start_raw
else None,
end_timestamp=datetime.strptime(
period_end_raw, "%Y-%m-%dT%H:%M:%SZ"
).replace(tzinfo=pytz.utc)
).replace(tzinfo=timezone.utc)
if period_end_raw
else None,
)
Expand Down Expand Up @@ -95,12 +96,12 @@ def extract_lineups(self) -> Tuple[Team, Team]:
raise DeserializationError("Lineup incomplete")
return home_team, away_team

def extract_date(self) -> Optional[str]:
def extract_date(self) -> Optional[datetime]:
"""Return the date of the game."""
if "matchInfo" in self.root and "date" in self.root["matchInfo"]:
return datetime.strptime(
self.root["matchInfo"]["date"], "%Y-%m-%dZ"
).astimezone(timezone.utc)
).replace(tzinfo=timezone.utc)
else:
return None

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""XML parser for Stats Perform MA1 feeds."""
import pytz
from datetime import datetime
from datetime import datetime, timezone
from typing import Any, Optional, List, Dict, Tuple

from kloppy.domain import Period, Score, Team, Ground, Player
Expand All @@ -22,10 +21,10 @@ def extract_periods(self) -> List[Period]:
id=int(period.get("id")),
start_timestamp=datetime.strptime(
period.get("start"), "%Y-%m-%dT%H:%M:%SZ"
).replace(tzinfo=pytz.utc),
).replace(tzinfo=timezone.utc),
end_timestamp=datetime.strptime(
period.get("end"), "%Y-%m-%dT%H:%M:%SZ"
).replace(tzinfo=pytz.utc),
).replace(tzinfo=timezone.utc),
)
)
return parsed_periods
Expand Down
Loading

0 comments on commit cb4893e

Please sign in to comment.