Skip to content

Commit

Permalink
Merge branch 'master' into feat/tracab_meta
Browse files Browse the repository at this point in the history
  • Loading branch information
probberechts committed Dec 17, 2024
2 parents 0fe591f + 45ab84c commit af54a2e
Show file tree
Hide file tree
Showing 17 changed files with 1,279 additions and 80 deletions.
157 changes: 155 additions & 2 deletions kloppy/_providers/sportec.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Optional, List
from typing import List, Optional

from requests.exceptions import HTTPError

from kloppy.config import get_config
from kloppy.domain import EventDataset, EventFactory, TrackingDataset
Expand All @@ -10,7 +12,7 @@
SportecTrackingDataDeserializer,
SportecTrackingDataInputs,
)
from kloppy.io import open_as_file, FileLike
from kloppy.io import FileLike, open_as_file
from kloppy.utils import deprecated


Expand Down Expand Up @@ -82,3 +84,154 @@ def load(
return load_event(
event_data, meta_data, event_types, coordinates, event_factory
)


def get_IDSSE_url(match_id: str, data_type: str) -> str:
"""Returns the URL for the meta, event or tracking data for a match in the IDDSE dataset."""
# match_id -> file_id
DATA_MAP = {
"J03WPY": {"meta": 48392497, "event": 48392542, "tracking": 48392572},
"J03WN1": {"meta": 48392491, "event": 48392527, "tracking": 48392512},
"J03WMX": {"meta": 48392485, "event": 48392524, "tracking": 48392539},
"J03WOH": {"meta": 48392515, "event": 48392500, "tracking": 48392578},
"J03WQQ": {"meta": 48392488, "event": 48392521, "tracking": 48392545},
"J03WOY": {"meta": 48392503, "event": 48392518, "tracking": 48392551},
"J03WR9": {"meta": 48392494, "event": 48392530, "tracking": 48392563},
}
# URL constant
DATA_URL = "https://figshare.com/ndownloader/files/{file_id}?private_link=1f806cb3e755c6b54e05"

if data_type not in ["meta", "event", "tracking"]:
raise ValueError(
f"Data type should be one of ['meta', 'event', 'tracking'], but got {data_type}"
)
if match_id not in DATA_MAP:
raise ValueError(
f"This match_id is not available, please select from {list(DATA_MAP.keys())}"
)
return DATA_URL.format(file_id=str(DATA_MAP[match_id][data_type]))


def load_open_event_data(
match_id: str = "J03WPY",
event_types: Optional[List[str]] = None,
coordinates: Optional[str] = None,
event_factory: Optional[EventFactory] = None,
) -> EventDataset:
"""
Load event data for a game from the IDSSE dataset.
The IDSSE dataset will be released with the publication of the *An integrated
dataset of synchronized spatiotemporal and event data in elite soccer*
paper [1]_ and is released under the Creative Commons Attribution 4.0
license.
Args:
match_id (str, optional):
Match-ID of one of the matches. Defaults to `'J03WPY'`. See below
for available matches.
event_types:
coordinates:
event_factory:
Notes:
The dataset contains seven full matches of raw event and position data
for both teams and the ball from the German Men's Bundesliga season
2022/23 first and second division. A detailed description of the
dataset as well as the collection process can be found in the
accompanying paper.
The following matches are available::
matches = {
'J03WMX': 1. FC Köln vs. FC Bayern München,
'J03WN1': VfL Bochum 1848 vs. Bayer 04 Leverkusen,
'J03WPY': Fortuna Düsseldorf vs. 1. FC Nürnberg,
'J03WOH': Fortuna Düsseldorf vs. SSV Jahn Regensburg,
'J03WQQ': Fortuna Düsseldorf vs. FC St. Pauli,
'J03WOY': Fortuna Düsseldorf vs. F.C. Hansa Rostock,
'J03WR9': Fortuna Düsseldorf vs. 1. FC Kaiserslautern
}
References:
.. [1] Bassek, M., Weber, H., Rein, R., & Memmert, D. (2024). "An integrated
dataset of synchronized spatiotemporal and event data in elite soccer."
In Submission.
"""
try:
return load_event(
event_data=get_IDSSE_url(match_id, "event"),
meta_data=get_IDSSE_url(match_id, "meta"),
event_types=event_types,
coordinates=coordinates,
event_factory=event_factory,
)
except HTTPError as e:
raise HTTPError(
"Unable to retrieve data. The dataset archive location may have changed. "
"See https://github.com/PySport/kloppy/issues/369 for details."
) from e


def load_open_tracking_data(
match_id: str = "J03WPY",
sample_rate: Optional[float] = None,
limit: Optional[int] = None,
coordinates: Optional[str] = None,
only_alive: Optional[bool] = True,
) -> TrackingDataset:
"""
Load tracking data for a game from the IDSSE dataset.
The IDSSE dataset will be released with the publication of the *An integrated
dataset of synchronized spatiotemporal and event data in elite soccer*
paper [1]_ and is released under the Creative Commons Attribution 4.0
license.
Args:
match_id (str, optional):
Match-ID of one of the matches. Defaults to `'J03WPY'`. See below
for available matches.
sampe_rate:
limit:
coordinates:
only_alive:
Notes:
The dataset contains seven full matches of raw event and position data
for both teams and the ball from the German Men's Bundesliga season
2022/23 first and second division. A detailed description of the
dataset as well as the collection process can be found in the
accompanying paper.
The following matches are available::
matches = {
'J03WMX': 1. FC Köln vs. FC Bayern München,
'J03WN1': VfL Bochum 1848 vs. Bayer 04 Leverkusen,
'J03WPY': Fortuna Düsseldorf vs. 1. FC Nürnberg,
'J03WOH': Fortuna Düsseldorf vs. SSV Jahn Regensburg,
'J03WQQ': Fortuna Düsseldorf vs. FC St. Pauli,
'J03WOY': Fortuna Düsseldorf vs. F.C. Hansa Rostock,
'J03WR9': Fortuna Düsseldorf vs. 1. FC Kaiserslautern
}
References:
.. [1] Bassek, M., Weber, H., Rein, R., & Memmert, D. (2024). "An integrated
dataset of synchronized spatiotemporal and event data in elite soccer."
In Submission.
"""
try:
return load_tracking(
raw_data=get_IDSSE_url(match_id, "tracking"),
meta_data=get_IDSSE_url(match_id, "meta"),
sample_rate=sample_rate,
limit=limit,
coordinates=coordinates,
only_alive=only_alive,
)
except HTTPError as e:
raise HTTPError(
"Unable to retrieve data. The dataset archive location may have changed. "
"See https://github.com/PySport/kloppy/issues/369 for details."
) from e
43 changes: 42 additions & 1 deletion kloppy/domain/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from .position import PositionType

from ...utils import deprecated
from ...utils import deprecated, snake_case

if sys.version_info >= (3, 8):
from typing import Literal
Expand Down Expand Up @@ -119,6 +119,46 @@ def __str__(self):
return self.value


class OfficialType(Enum):
"""Enumeration for types of officials (referees)."""

VideoAssistantReferee = "Video Assistant Referee"
MainReferee = "Main Referee"
AssistantReferee = "Assistant Referee"
FourthOfficial = "Fourth Official"

def __str__(self):
return self.value


@dataclass(frozen=True)
class Official:
"""
Represents an official (referee) with optional names and roles.
"""

official_id: str
name: Optional[str] = None
first_name: Optional[str] = None
last_name: Optional[str] = None
role: Optional[OfficialType] = None

@property
def full_name(self):
"""
Returns the full name of the official, falling back to role-based or ID-based naming.
"""
if self.name:
return self.name
if self.first_name and self.last_name:
return f"{self.first_name} {self.last_name}"
if self.last_name:
return self.last_name
if self.role:
return f"{snake_case(str(self.role))}_{self.official_id}"
return f"official_{self.official_id}"


@dataclass(frozen=True)
class Player:
"""
Expand Down Expand Up @@ -1016,6 +1056,7 @@ class Metadata:
game_id: Optional[str] = None
home_coach: Optional[str] = None
away_coach: Optional[str] = None
officials: Optional[List] = field(default_factory=list)
attributes: Optional[Dict] = field(default_factory=dict, compare=False)

def __post_init__(self):
Expand Down
2 changes: 2 additions & 0 deletions kloppy/domain/models/position.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ class PositionType(Enum):
CenterBack = ("Center Back", "CB", "Defender")
LeftCenterBack = ("Left Center Back", "LCB", "CenterBack")
RightCenterBack = ("Right Center Back", "RCB", "CenterBack")
LeftWingBack = ("Left Wing Back", "LWB", "WingBack")
RightWingBack = ("Right Wing Back", "RWB", "WingBack")

Midfielder = ("Midfielder", "MID", None)
DefensiveMidfield = ("Defensive Midfield", "DM", "Midfielder")
Expand Down
38 changes: 38 additions & 0 deletions kloppy/infra/serializers/event/sportec/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
CardType,
AttackingDirection,
PositionType,
Official,
OfficialType,
)
from kloppy.exceptions import DeserializationError
from kloppy.infra.serializers.event.deserializer import EventDataDeserializer
Expand All @@ -55,6 +57,14 @@
"LA": PositionType.LeftWing,
}

referee_types_mapping: Dict[str, OfficialType] = {
"referee": OfficialType.MainReferee,
"firstAssistant": OfficialType.AssistantReferee,
"videoReferee": OfficialType.VideoAssistantReferee,
"secondAssistant": OfficialType.AssistantReferee,
"fourthOfficial": OfficialType.FourthOfficial,
}

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -102,6 +112,7 @@ class SportecMetadata(NamedTuple):
fps: int
home_coach: str
away_coach: str
officials: List[Official]


def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
Expand Down Expand Up @@ -213,6 +224,31 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
]
)

if hasattr(match_root, "MatchInformation") and hasattr(
match_root.MatchInformation, "Referees"
):
officials = []
referee_path = objectify.ObjectPath(
"PutDataRequest.MatchInformation.Referees"
)
referee_elms = referee_path.find(match_root).iterchildren(
tag="Referee"
)

for referee in referee_elms:
ref_attrib = referee.attrib
officials.append(
Official(
official_id=ref_attrib["PersonId"],
name=ref_attrib["Shortname"],
first_name=ref_attrib["FirstName"],
last_name=ref_attrib["LastName"],
role=referee_types_mapping[ref_attrib["Role"]],
)
)
else:
officials = []

return SportecMetadata(
score=score,
teams=teams,
Expand All @@ -222,6 +258,7 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
fps=SPORTEC_FPS,
home_coach=home_coach,
away_coach=away_coach,
officials=officials,
)


Expand Down Expand Up @@ -673,6 +710,7 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset:
game_id=game_id,
home_coach=home_coach,
away_coach=away_coach,
officials=sportec_metadata.officials,
)

return EventDataset(
Expand Down
Loading

0 comments on commit af54a2e

Please sign in to comment.