diff --git a/kloppy/_providers/sportec.py b/kloppy/_providers/sportec.py
index 2aff6625..9df12de6 100644
--- a/kloppy/_providers/sportec.py
+++ b/kloppy/_providers/sportec.py
@@ -1,4 +1,6 @@
-from typing import Optional, List
+from typing import List, Optional
+
+from requests.exceptions import HTTPError
from kloppy.config import get_config
from kloppy.domain import EventDataset, EventFactory, TrackingDataset
@@ -10,7 +12,7 @@
SportecTrackingDataDeserializer,
SportecTrackingDataInputs,
)
-from kloppy.io import open_as_file, FileLike
+from kloppy.io import FileLike, open_as_file
from kloppy.utils import deprecated
@@ -82,3 +84,154 @@ def load(
return load_event(
event_data, meta_data, event_types, coordinates, event_factory
)
+
+
+def get_IDSSE_url(match_id: str, data_type: str) -> str:
+ """Returns the URL for the meta, event or tracking data for a match in the IDDSE dataset."""
+ # match_id -> file_id
+ DATA_MAP = {
+ "J03WPY": {"meta": 48392497, "event": 48392542, "tracking": 48392572},
+ "J03WN1": {"meta": 48392491, "event": 48392527, "tracking": 48392512},
+ "J03WMX": {"meta": 48392485, "event": 48392524, "tracking": 48392539},
+ "J03WOH": {"meta": 48392515, "event": 48392500, "tracking": 48392578},
+ "J03WQQ": {"meta": 48392488, "event": 48392521, "tracking": 48392545},
+ "J03WOY": {"meta": 48392503, "event": 48392518, "tracking": 48392551},
+ "J03WR9": {"meta": 48392494, "event": 48392530, "tracking": 48392563},
+ }
+ # URL constant
+ DATA_URL = "https://figshare.com/ndownloader/files/{file_id}?private_link=1f806cb3e755c6b54e05"
+
+ if data_type not in ["meta", "event", "tracking"]:
+ raise ValueError(
+ f"Data type should be one of ['meta', 'event', 'tracking'], but got {data_type}"
+ )
+ if match_id not in DATA_MAP:
+ raise ValueError(
+ f"This match_id is not available, please select from {list(DATA_MAP.keys())}"
+ )
+ return DATA_URL.format(file_id=str(DATA_MAP[match_id][data_type]))
+
+
+def load_open_event_data(
+ match_id: str = "J03WPY",
+ event_types: Optional[List[str]] = None,
+ coordinates: Optional[str] = None,
+ event_factory: Optional[EventFactory] = None,
+) -> EventDataset:
+ """
+ Load event data for a game from the IDSSE dataset.
+
+ The IDSSE dataset will be released with the publication of the *An integrated
+ dataset of synchronized spatiotemporal and event data in elite soccer*
+ paper [1]_ and is released under the Creative Commons Attribution 4.0
+ license.
+
+ Args:
+ match_id (str, optional):
+ Match-ID of one of the matches. Defaults to `'J03WPY'`. See below
+ for available matches.
+ event_types:
+ coordinates:
+ event_factory:
+
+ Notes:
+ The dataset contains seven full matches of raw event and position data
+ for both teams and the ball from the German Men's Bundesliga season
+ 2022/23 first and second division. A detailed description of the
+ dataset as well as the collection process can be found in the
+ accompanying paper.
+
+ The following matches are available::
+
+ matches = {
+ 'J03WMX': 1. FC Köln vs. FC Bayern München,
+ 'J03WN1': VfL Bochum 1848 vs. Bayer 04 Leverkusen,
+ 'J03WPY': Fortuna Düsseldorf vs. 1. FC Nürnberg,
+ 'J03WOH': Fortuna Düsseldorf vs. SSV Jahn Regensburg,
+ 'J03WQQ': Fortuna Düsseldorf vs. FC St. Pauli,
+ 'J03WOY': Fortuna Düsseldorf vs. F.C. Hansa Rostock,
+ 'J03WR9': Fortuna Düsseldorf vs. 1. FC Kaiserslautern
+ }
+
+ References:
+ .. [1] Bassek, M., Weber, H., Rein, R., & Memmert, D. (2024). "An integrated
+ dataset of synchronized spatiotemporal and event data in elite soccer."
+ In Submission.
+ """
+ try:
+ return load_event(
+ event_data=get_IDSSE_url(match_id, "event"),
+ meta_data=get_IDSSE_url(match_id, "meta"),
+ event_types=event_types,
+ coordinates=coordinates,
+ event_factory=event_factory,
+ )
+ except HTTPError as e:
+ raise HTTPError(
+ "Unable to retrieve data. The dataset archive location may have changed. "
+ "See https://github.com/PySport/kloppy/issues/369 for details."
+ ) from e
+
+
+def load_open_tracking_data(
+ match_id: str = "J03WPY",
+ sample_rate: Optional[float] = None,
+ limit: Optional[int] = None,
+ coordinates: Optional[str] = None,
+ only_alive: Optional[bool] = True,
+) -> TrackingDataset:
+ """
+ Load tracking data for a game from the IDSSE dataset.
+
+ The IDSSE dataset will be released with the publication of the *An integrated
+ dataset of synchronized spatiotemporal and event data in elite soccer*
+ paper [1]_ and is released under the Creative Commons Attribution 4.0
+ license.
+
+ Args:
+ match_id (str, optional):
+ Match-ID of one of the matches. Defaults to `'J03WPY'`. See below
+ for available matches.
+ sampe_rate:
+ limit:
+ coordinates:
+ only_alive:
+
+ Notes:
+ The dataset contains seven full matches of raw event and position data
+ for both teams and the ball from the German Men's Bundesliga season
+ 2022/23 first and second division. A detailed description of the
+ dataset as well as the collection process can be found in the
+ accompanying paper.
+
+ The following matches are available::
+
+ matches = {
+ 'J03WMX': 1. FC Köln vs. FC Bayern München,
+ 'J03WN1': VfL Bochum 1848 vs. Bayer 04 Leverkusen,
+ 'J03WPY': Fortuna Düsseldorf vs. 1. FC Nürnberg,
+ 'J03WOH': Fortuna Düsseldorf vs. SSV Jahn Regensburg,
+ 'J03WQQ': Fortuna Düsseldorf vs. FC St. Pauli,
+ 'J03WOY': Fortuna Düsseldorf vs. F.C. Hansa Rostock,
+ 'J03WR9': Fortuna Düsseldorf vs. 1. FC Kaiserslautern
+ }
+
+ References:
+ .. [1] Bassek, M., Weber, H., Rein, R., & Memmert, D. (2024). "An integrated
+ dataset of synchronized spatiotemporal and event data in elite soccer."
+ In Submission.
+ """
+ try:
+ return load_tracking(
+ raw_data=get_IDSSE_url(match_id, "tracking"),
+ meta_data=get_IDSSE_url(match_id, "meta"),
+ sample_rate=sample_rate,
+ limit=limit,
+ coordinates=coordinates,
+ only_alive=only_alive,
+ )
+ except HTTPError as e:
+ raise HTTPError(
+ "Unable to retrieve data. The dataset archive location may have changed. "
+ "See https://github.com/PySport/kloppy/issues/369 for details."
+ ) from e
diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py
index c1830d1b..b4880451 100644
--- a/kloppy/domain/models/common.py
+++ b/kloppy/domain/models/common.py
@@ -20,7 +20,7 @@
from .position import PositionType
-from ...utils import deprecated
+from ...utils import deprecated, snake_case
if sys.version_info >= (3, 8):
from typing import Literal
@@ -119,6 +119,46 @@ def __str__(self):
return self.value
+class OfficialType(Enum):
+ """Enumeration for types of officials (referees)."""
+
+ VideoAssistantReferee = "Video Assistant Referee"
+ MainReferee = "Main Referee"
+ AssistantReferee = "Assistant Referee"
+ FourthOfficial = "Fourth Official"
+
+ def __str__(self):
+ return self.value
+
+
+@dataclass(frozen=True)
+class Official:
+ """
+ Represents an official (referee) with optional names and roles.
+ """
+
+ official_id: str
+ name: Optional[str] = None
+ first_name: Optional[str] = None
+ last_name: Optional[str] = None
+ role: Optional[OfficialType] = None
+
+ @property
+ def full_name(self):
+ """
+ Returns the full name of the official, falling back to role-based or ID-based naming.
+ """
+ if self.name:
+ return self.name
+ if self.first_name and self.last_name:
+ return f"{self.first_name} {self.last_name}"
+ if self.last_name:
+ return self.last_name
+ if self.role:
+ return f"{snake_case(str(self.role))}_{self.official_id}"
+ return f"official_{self.official_id}"
+
+
@dataclass(frozen=True)
class Player:
"""
@@ -1016,6 +1056,7 @@ class Metadata:
game_id: Optional[str] = None
home_coach: Optional[str] = None
away_coach: Optional[str] = None
+ officials: Optional[List] = field(default_factory=list)
attributes: Optional[Dict] = field(default_factory=dict, compare=False)
def __post_init__(self):
diff --git a/kloppy/domain/models/position.py b/kloppy/domain/models/position.py
index c0daebfd..84d08280 100644
--- a/kloppy/domain/models/position.py
+++ b/kloppy/domain/models/position.py
@@ -13,6 +13,8 @@ class PositionType(Enum):
CenterBack = ("Center Back", "CB", "Defender")
LeftCenterBack = ("Left Center Back", "LCB", "CenterBack")
RightCenterBack = ("Right Center Back", "RCB", "CenterBack")
+ LeftWingBack = ("Left Wing Back", "LWB", "WingBack")
+ RightWingBack = ("Right Wing Back", "RWB", "WingBack")
Midfielder = ("Midfielder", "MID", None)
DefensiveMidfield = ("Defensive Midfield", "DM", "Midfielder")
diff --git a/kloppy/infra/serializers/event/sportec/deserializer.py b/kloppy/infra/serializers/event/sportec/deserializer.py
index 14895206..57d105a4 100644
--- a/kloppy/infra/serializers/event/sportec/deserializer.py
+++ b/kloppy/infra/serializers/event/sportec/deserializer.py
@@ -29,6 +29,8 @@
CardType,
AttackingDirection,
PositionType,
+ Official,
+ OfficialType,
)
from kloppy.exceptions import DeserializationError
from kloppy.infra.serializers.event.deserializer import EventDataDeserializer
@@ -55,6 +57,14 @@
"LA": PositionType.LeftWing,
}
+referee_types_mapping: Dict[str, OfficialType] = {
+ "referee": OfficialType.MainReferee,
+ "firstAssistant": OfficialType.AssistantReferee,
+ "videoReferee": OfficialType.VideoAssistantReferee,
+ "secondAssistant": OfficialType.AssistantReferee,
+ "fourthOfficial": OfficialType.FourthOfficial,
+}
+
logger = logging.getLogger(__name__)
@@ -102,6 +112,7 @@ class SportecMetadata(NamedTuple):
fps: int
home_coach: str
away_coach: str
+ officials: List[Official]
def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
@@ -213,6 +224,31 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
]
)
+ if hasattr(match_root, "MatchInformation") and hasattr(
+ match_root.MatchInformation, "Referees"
+ ):
+ officials = []
+ referee_path = objectify.ObjectPath(
+ "PutDataRequest.MatchInformation.Referees"
+ )
+ referee_elms = referee_path.find(match_root).iterchildren(
+ tag="Referee"
+ )
+
+ for referee in referee_elms:
+ ref_attrib = referee.attrib
+ officials.append(
+ Official(
+ official_id=ref_attrib["PersonId"],
+ name=ref_attrib["Shortname"],
+ first_name=ref_attrib["FirstName"],
+ last_name=ref_attrib["LastName"],
+ role=referee_types_mapping[ref_attrib["Role"]],
+ )
+ )
+ else:
+ officials = []
+
return SportecMetadata(
score=score,
teams=teams,
@@ -222,6 +258,7 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
fps=SPORTEC_FPS,
home_coach=home_coach,
away_coach=away_coach,
+ officials=officials,
)
@@ -673,6 +710,7 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset:
game_id=game_id,
home_coach=home_coach,
away_coach=away_coach,
+ officials=sportec_metadata.officials,
)
return EventDataset(
diff --git a/kloppy/infra/serializers/event/statsperform/deserializer.py b/kloppy/infra/serializers/event/statsperform/deserializer.py
index cf6f7a35..5ec3dac0 100644
--- a/kloppy/infra/serializers/event/statsperform/deserializer.py
+++ b/kloppy/infra/serializers/event/statsperform/deserializer.py
@@ -239,7 +239,9 @@
}
-def _parse_pass(raw_event: OptaEvent) -> Dict:
+def _parse_pass(
+ raw_event: OptaEvent, next_event: OptaEvent, next_next_event: OptaEvent
+) -> Dict:
if raw_event.outcome:
result = PassResult.COMPLETE
else:
@@ -250,6 +252,21 @@ def _parse_pass(raw_event: OptaEvent) -> Dict:
qualifiers = pass_qualifiers + overall_qualifiers
+ # Set the end location of a deflected pass to the start location
+ # of the next action and the outcome to "success" if the deflected
+ # pass reached a teammate
+ if next_event is not None and next_next_event is not None:
+ if (
+ next_event.type_id == EVENT_TYPE_BALL_TOUCH
+ and next_event.outcome == 1
+ and next_next_event.contestant_id == raw_event.contestant_id
+ ):
+ result = PassResult.COMPLETE
+ receiver_coordinates = Point(
+ x=next_next_event.x,
+ y=next_next_event.y,
+ )
+
return dict(
result=result,
receiver_coordinates=receiver_coordinates,
@@ -673,11 +690,16 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
f"Unknown team_id {raw_event.contestant_id}"
)
- next_event_elm = (
+ next_event = (
raw_events[idx + 1]
if (idx + 1) < len(raw_events)
else None
)
+ next_next_event = (
+ raw_events[idx + 2]
+ if (idx + 2) < len(raw_events)
+ else None
+ )
period = next(
(
period
@@ -702,11 +724,22 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
f"Set end of period {period.id} to {raw_event.timestamp}"
)
period.end_timestamp = raw_event.timestamp
+ elif raw_event.type_id == EVENT_TYPE_PLAYER_ON:
+ continue
else:
if not period.start_timestamp:
# not started yet
continue
+ if raw_event.contestant_id == teams[0].team_id:
+ team = teams[0]
+ elif raw_event.contestant_id == teams[1].team_id:
+ team = teams[1]
+ else:
+ raise DeserializationError(
+ f"Unknown team_id {raw_event.contestant_id}"
+ )
+
player = None
if raw_event.player_id is not None:
player = team.get_player_by_id(raw_event.player_id)
@@ -734,7 +767,9 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
)
if raw_event.type_id == EVENT_TYPE_PASS:
- pass_event_kwargs = _parse_pass(raw_event)
+ pass_event_kwargs = _parse_pass(
+ raw_event, next_event, next_next_event
+ )
event = self.event_factory.build_pass(
**pass_event_kwargs,
**generic_event_kwargs,
@@ -796,7 +831,7 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
EVENT_TYPE_BLOCKED_PASS,
):
interception_event_kwargs = _parse_interception(
- raw_event, team, next_event_elm
+ raw_event, team, next_event
)
event = self.event_factory.build_interception(
**interception_event_kwargs,
@@ -859,7 +894,7 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
timedelta(0), generic_event_kwargs["timestamp"]
)
substitution_event_kwargs = _parse_substitution(
- next_event_elm, team
+ next_event, team
)
event = self.event_factory.build_substitution(
result=None,
diff --git a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py
index 08088dd1..8e2143aa 100644
--- a/kloppy/infra/serializers/event/wyscout/deserializer_v3.py
+++ b/kloppy/infra/serializers/event/wyscout/deserializer_v3.py
@@ -2,49 +2,42 @@
import logging
from dataclasses import replace
from datetime import timedelta, timezone
+from enum import Enum
+from typing import Dict, List, Optional
+
from dateutil.parser import parse
-from typing import Dict, List
from kloppy.domain import (
- BallOutEvent,
BodyPart,
BodyPartQualifier,
- CardEvent,
CardType,
+ CarryResult,
CounterAttackQualifier,
- Dimension,
- DuelType,
DuelQualifier,
DuelResult,
+ DuelType,
EventDataset,
- FoulCommittedEvent,
- GenericEvent,
- GoalkeeperQualifier,
+ FormationType,
GoalkeeperActionType,
+ GoalkeeperQualifier,
Ground,
InterceptionResult,
Metadata,
Orientation,
- PassEvent,
PassQualifier,
PassResult,
PassType,
Period,
- PitchDimensions,
Player,
Point,
+ PositionType,
Provider,
Qualifier,
- RecoveryEvent,
SetPieceQualifier,
SetPieceType,
- ShotEvent,
ShotResult,
- TakeOnEvent,
TakeOnResult,
Team,
- FormationType,
- CarryResult,
)
from kloppy.exceptions import DeserializationError
from kloppy.utils import performance_logging
@@ -52,7 +45,6 @@
from ..deserializer import EventDataDeserializer
from .deserializer_v2 import WyscoutInputs
-
logger = logging.getLogger(__name__)
@@ -81,39 +73,173 @@
"3-2-3-2": FormationType.THREE_TWO_THREE_TWO,
}
+position_types_mapping: Dict[str, PositionType] = {
+ "GK": PositionType.Goalkeeper,
+ "LB": PositionType.LeftBack,
+ "LWB": PositionType.LeftWingBack,
+ "LB5": PositionType.LeftBack,
+ "LCB": PositionType.LeftCenterBack,
+ "LCB3": PositionType.LeftCenterBack,
+ "CB": PositionType.CenterBack,
+ "RCB": PositionType.RightCenterBack,
+ "RCB3": PositionType.RightCenterBack,
+ "RB": PositionType.RightBack,
+ "RWB": PositionType.RightWingBack,
+ "RB5": PositionType.RightBack,
+ "LW": PositionType.LeftWing,
+ "LAMF": PositionType.LeftAttackingMidfield,
+ "LCMF3": PositionType.LeftCentralMidfield,
+ "LCMF": PositionType.LeftCentralMidfield,
+ "DMF": PositionType.DefensiveMidfield,
+ "LDMF": PositionType.LeftDefensiveMidfield,
+ "RDMF": PositionType.RightDefensiveMidfield,
+ "RCMF3": PositionType.RightCentralMidfield,
+ "RCMF": PositionType.RightCentralMidfield,
+ "RAMF": PositionType.RightAttackingMidfield,
+ "RW": PositionType.RightWing,
+ "AMF": PositionType.AttackingMidfield,
+ "LWF": PositionType.LeftForward,
+ "CF": PositionType.Striker,
+ "SS": PositionType.Striker,
+ "RWF": PositionType.RightForward,
+}
+
def _flip_point(point: Point) -> Point:
return Point(x=100 - point.x, y=100 - point.y)
+class ShotZoneResults(str, Enum):
+ GOAL_BOTTOM_LEFT = "glb"
+ GOAL_BOTTOM_RIGHT = "grb"
+ GOAL_BOTTOM_CENTER = "gb"
+ GOAL_CENTER_LEFT = "gl"
+ GOAL_CENTER = "gc"
+ GOAL_CENTER_RIGHT = "gr"
+ GOAL_TOP_LEFT = "glt"
+ GOAL_TOP_RIGHT = "grt"
+ GOAL_TOP_CENTER = "gt"
+ OUT_BOTTOM_RIGHT = "obr"
+ OUT_BOTTOM_LEFT = "olb"
+ OUT_RIGHT = "or"
+ OUT_LEFT = "ol"
+ OUT_LEFT_TOP = "olt"
+ OUT_TOP = "ot"
+ OUT_RIGHT_TOP = "ort"
+ BLOCKED = "bc"
+
+
def _parse_team(raw_events, wyId: str, ground: Ground) -> Team:
+ # Get the first formation description
+ first_period_formation_info = raw_events["formations"][wyId]["1H"]
+ first_formation_descr = next(iter(first_period_formation_info.values()))
+ formation_str, formation_info = next(iter(first_formation_descr.items()))
+
+ # Extract the formation and players' positions
+ starting_formation = formations.get(formation_str)
+ starting_players_positions = {
+ player_id: position_types_mapping.get(
+ player_info["position"].upper(), PositionType.Unknown
+ )
+ for player_descr in formation_info["players"]
+ for player_id, player_info in player_descr.items()
+ }
+
team = Team(
team_id=wyId,
name=raw_events["teams"][wyId]["team"]["officialName"],
ground=ground,
- starting_formation=formations[
- next(
- iter(
- raw_events["formations"][wyId]["1H"][
- next(iter(raw_events["formations"][wyId]["1H"]))
- ]
- )
- )
- ],
+ starting_formation=starting_formation,
)
- team.players = [
- Player(
- player_id=str(player["player"]["wyId"]),
- team=team,
- jersey_no=None,
- first_name=player["player"]["firstName"],
- last_name=player["player"]["lastName"],
+
+ for player in raw_events["players"][wyId]:
+ player_id = str(player["player"]["wyId"])
+ starting_position = starting_players_positions.get(player_id)
+ team.players.append(
+ Player(
+ player_id=player_id,
+ team=team,
+ jersey_no=None,
+ first_name=player["player"]["firstName"],
+ last_name=player["player"]["lastName"],
+ starting=starting_position is not None,
+ starting_position=starting_position,
+ )
)
- for player in raw_events["players"][wyId]
- ]
+
return team
+def _create_shot_result_coordinates(raw_event: Dict) -> Optional[Point]:
+ """Estimate the shot end location from the Wyscout tags.
+
+ Wyscout does not provide end-coordinates of shots. Instead shots on goal
+ are tagged with a zone. This function maps each of these zones to
+ a coordinate. The zones and corresponding y-coordinate are depicted below.
+
+
+ olt | ot | ort
+ --------------------------------
+ ||=================||
+ -------------------------------
+ || glt | gt | grt ||
+ --------------------------------
+ ol || gl | gc | gr || or
+ --------------------------------
+ olb || glb | gb | grb || orb
+
+ 40 45 50 55 60 (y-coordinate of zone)
+ 44.62 55.38 (y-coordiante of post)
+ """
+ if (
+ raw_event["shot"]["goalZone"] == ShotZoneResults.GOAL_BOTTOM_CENTER
+ or raw_event["shot"]["goalZone"] == ShotZoneResults.GOAL_CENTER
+ or raw_event["shot"]["goalZone"] == ShotZoneResults.GOAL_TOP_CENTER
+ ):
+ return Point(100.0, 50.0)
+
+ if (
+ raw_event["shot"]["goalZone"] == ShotZoneResults.GOAL_BOTTOM_RIGHT
+ or raw_event["shot"]["goalZone"] == ShotZoneResults.GOAL_CENTER_RIGHT
+ or raw_event["shot"]["goalZone"] == ShotZoneResults.GOAL_TOP_RIGHT
+ ):
+ return Point(100.0, 55.0)
+
+ if (
+ raw_event["shot"]["goalZone"] == ShotZoneResults.GOAL_BOTTOM_LEFT
+ or raw_event["shot"]["goalZone"] == ShotZoneResults.GOAL_CENTER_LEFT
+ or raw_event["shot"]["goalZone"] == ShotZoneResults.GOAL_TOP_LEFT
+ ):
+ return Point(100.0, 45.0)
+
+ if raw_event["shot"]["goalZone"] == ShotZoneResults.OUT_TOP:
+ return Point(100.0, 50.0)
+
+ if (
+ raw_event["shot"]["goalZone"] == ShotZoneResults.OUT_RIGHT_TOP
+ or raw_event["shot"]["goalZone"] == ShotZoneResults.OUT_RIGHT
+ or raw_event["shot"]["goalZone"] == ShotZoneResults.OUT_BOTTOM_RIGHT
+ ):
+ return Point(100.0, 60.0)
+
+ if (
+ raw_event["shot"]["goalZone"] == ShotZoneResults.OUT_LEFT_TOP
+ or raw_event["shot"]["goalZone"] == ShotZoneResults.OUT_LEFT
+ or raw_event["shot"]["goalZone"] == ShotZoneResults.OUT_BOTTOM_LEFT
+ ):
+ return Point(100.0, 40.0)
+
+ # If the shot is blocked, the start location is the best possible estimate
+ # for the shot's end location
+ if raw_event["shot"]["goalZone"] == ShotZoneResults.BLOCKED:
+ return Point(
+ x=float(raw_event["location"]["x"]),
+ y=float(raw_event["location"]["y"]),
+ )
+
+ return None
+
+
def _generic_qualifiers(raw_event: Dict) -> List[Qualifier]:
qualifiers: List[Qualifier] = []
@@ -146,10 +272,7 @@ def _parse_shot(raw_event: Dict) -> Dict:
return {
"result": result,
- "result_coordinates": Point(
- x=float(0),
- y=float(0),
- ),
+ "result_coordinates": _create_shot_result_coordinates(raw_event),
"qualifiers": qualifiers,
}
@@ -632,9 +755,11 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
periods.append(
Period(
id=period_id,
- start_timestamp=timedelta(seconds=0)
- if len(periods) == 0
- else periods[-1].end_timestamp,
+ start_timestamp=(
+ timedelta(seconds=0)
+ if len(periods) == 0
+ else periods[-1].end_timestamp
+ ),
end_timestamp=None,
)
)
@@ -658,16 +783,20 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
generic_event_args = {
"event_id": raw_event["id"],
"raw_event": raw_event,
- "coordinates": Point(
- x=float(raw_event["location"]["x"]),
- y=float(raw_event["location"]["y"]),
- )
- if raw_event["location"]
- else None,
+ "coordinates": (
+ Point(
+ x=float(raw_event["location"]["x"]),
+ y=float(raw_event["location"]["y"]),
+ )
+ if raw_event["location"]
+ else None
+ ),
"team": team,
- "player": players[team_id][player_id]
- if player_id != INVALID_PLAYER
- else None,
+ "player": (
+ players[team_id][player_id]
+ if player_id != INVALID_PLAYER
+ else None
+ ),
"ball_owning_team": ball_owning_team,
"ball_state": None,
"period": periods[-1],
diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py
index 3f418375..7cc08516 100644
--- a/kloppy/infra/serializers/tracking/sportec/deserializer.py
+++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py
@@ -122,6 +122,7 @@ def deserialize(
with performance_logging("parse metadata", logger=logger):
sportec_metadata = sportec_metadata_from_xml_elm(match_root)
teams = home_team, away_team = sportec_metadata.teams
+
periods = sportec_metadata.periods
transformer = self.get_transformer(
pitch_length=sportec_metadata.x_max,
@@ -130,6 +131,12 @@ def deserialize(
home_coach = sportec_metadata.home_coach
away_coach = sportec_metadata.away_coach
+ official_ids = []
+ if sportec_metadata.officials:
+ official_ids = [
+ x.official_id for x in sportec_metadata.officials
+ ]
+
with performance_logging("parse raw data", logger=logger):
date = parse(
match_root.MatchInformation.General.attrib["KickoffTime"]
@@ -156,6 +163,7 @@ def _iter():
for i, (frame_id, frame_data) in enumerate(
sorted(raw_frames.items())
):
+
if "ball" not in frame_data:
# Frames without ball data are corrupt.
continue
@@ -193,6 +201,7 @@ def _iter():
)
for player_id, raw_player_data in frame_data.items()
if player_id != "ball"
+ and player_id not in official_ids
},
other_data={},
ball_coordinates=Point3D(
@@ -242,6 +251,7 @@ def _iter():
game_id=game_id,
home_coach=home_coach,
away_coach=away_coach,
+ officials=sportec_metadata.officials,
)
return TrackingDataset(
diff --git a/kloppy/sportec.py b/kloppy/sportec.py
index 79595791..848416dd 100644
--- a/kloppy/sportec.py
+++ b/kloppy/sportec.py
@@ -1 +1,7 @@
-from ._providers.sportec import load, load_event, load_tracking
+from ._providers.sportec import (
+ load,
+ load_event,
+ load_tracking,
+ load_open_event_data,
+ load_open_tracking_data,
+)
diff --git a/kloppy/tests/files/opta_f24.xml b/kloppy/tests/files/opta_f24.xml
index 03731965..02622eb8 100644
--- a/kloppy/tests/files/opta_f24.xml
+++ b/kloppy/tests/files/opta_f24.xml
@@ -308,6 +308,30 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/kloppy/tests/files/sportec_positional_w_referee.xml b/kloppy/tests/files/sportec_positional_w_referee.xml
new file mode 100644
index 00000000..d9f12d8f
--- /dev/null
+++ b/kloppy/tests/files/sportec_positional_w_referee.xml
@@ -0,0 +1,671 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/kloppy/tests/issues/issue_60/test_issue_60.py b/kloppy/tests/issues/issue_60/test_issue_60.py
index 5687973e..4d14b972 100644
--- a/kloppy/tests/issues/issue_60/test_issue_60.py
+++ b/kloppy/tests/issues/issue_60/test_issue_60.py
@@ -16,7 +16,7 @@ def test_deleted_event_opta(self):
assert deleted_event_id not in df["event_id"].to_list()
# OPTA F24 file: Pass -> Deleted Event -> Tackle
- assert event_dataset.events[16].event_name == "pass"
+ assert event_dataset.events[15].event_name == "pass"
assert (
- event_dataset.events[17].event_name == "duel"
+ event_dataset.events[16].event_name == "duel"
) # Deleted Event is filter out
diff --git a/kloppy/tests/test_adapter.py b/kloppy/tests/test_adapter.py
index dc9e20ad..10409958 100644
--- a/kloppy/tests/test_adapter.py
+++ b/kloppy/tests/test_adapter.py
@@ -57,4 +57,4 @@ def read_to_stream(self, url: str, output: BinaryIO):
# Asserts borrowed from `test_opta.py`
assert dataset.metadata.provider == Provider.OPTA
assert dataset.dataset_type == DatasetType.EVENT
- assert len(dataset.events) == 37
+ assert len(dataset.events) == 39
diff --git a/kloppy/tests/test_opta.py b/kloppy/tests/test_opta.py
index ded8100f..b38db5fa 100644
--- a/kloppy/tests/test_opta.py
+++ b/kloppy/tests/test_opta.py
@@ -34,6 +34,7 @@
SetPieceType,
ShotResult,
build_coordinate_system,
+ PassResult,
)
from kloppy import opta
from kloppy.infra.serializers.event.statsperform.deserializer import (
@@ -244,7 +245,7 @@ class TestOptaPassEvent:
def test_deserialize_all(self, dataset: EventDataset):
"""It should deserialize all pass events"""
events = dataset.find_all("pass")
- assert len(events) == 15
+ assert len(events) == 16
def test_receiver_coordinates(self, dataset: EventDataset):
"""Test if the receiver coordinates are correctly deserialized"""
@@ -274,6 +275,13 @@ def test_pass_qualifiers(self, dataset: EventDataset):
PassQualifier
)
+ def test_pass_qualifiers_for_deflected_pass(self, dataset: EventDataset):
+ """Test if the pass type qualfiers are correctly deserialized for deflected passes"""
+ deflected_pass = dataset.get_event_by_id("2509132176")
+ assert deflected_pass.result == PassResult.COMPLETE
+ assert deflected_pass.receiver_coordinates.x == 3.3
+ assert deflected_pass.receiver_coordinates.y == 81.1
+
def test_ball_state(self, dataset: EventDataset):
"""Test if the ball state is correctly set"""
events = dataset.find_all("pass")
@@ -302,7 +310,7 @@ class TestOptaShotEvent:
def test_deserialize_all(self, dataset: EventDataset):
"""It should deserialize all shot events"""
events = dataset.find_all("shot")
- assert len(events) == 3
+ assert len(events) == 4
def test_correct_deserialization(self, dataset: EventDataset):
"""Test if the shot event is correctly deserialized"""
diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py
index 1c11bb78..ac8ad2de 100644
--- a/kloppy/tests/test_sportec.py
+++ b/kloppy/tests/test_sportec.py
@@ -16,6 +16,8 @@
BallState,
Point3D,
PositionType,
+ OfficialType,
+ Official,
)
from kloppy import sportec
@@ -119,6 +121,10 @@ class TestSportecTrackingData:
def raw_data(self, base_dir) -> str:
return base_dir / "files/sportec_positional.xml"
+ @pytest.fixture
+ def raw_data_referee(self, base_dir) -> str:
+ return base_dir / "files/sportec_positional_w_referee.xml"
+
@pytest.fixture
def meta_data(self, base_dir) -> str:
return base_dir / "files/sportec_meta.xml"
@@ -145,6 +151,7 @@ def test_load_metadata(self, raw_data: Path, meta_data: Path):
assert dataset.metadata.periods[1].end_timestamp == timedelta(
seconds=4000 + 2996.68
)
+ assert len(dataset.metadata.officials) == 4
def test_load_frames(self, raw_data: Path, meta_data: Path):
dataset = sportec.load_tracking(
@@ -238,3 +245,52 @@ def test_enriched_metadata(self, raw_data: Path, meta_data: Path):
if away_coach:
assert isinstance(away_coach, str)
assert away_coach == "M. Rose"
+
+ def test_referees(self, raw_data_referee: Path, meta_data: Path):
+ dataset = sportec.load_tracking(
+ raw_data=raw_data_referee,
+ meta_data=meta_data,
+ coordinates="sportec",
+ only_alive=True,
+ )
+ assert len(dataset.metadata.officials) == 4
+
+ assert (
+ Official(
+ official_id="42",
+ name="Pierluigi Collina",
+ role=OfficialType.MainReferee,
+ ).role.value
+ == "Main Referee"
+ )
+
+ assert (
+ Official(
+ official_id="42",
+ name="Pierluigi Collina",
+ role=OfficialType.MainReferee,
+ ).full_name
+ == "Pierluigi Collina"
+ )
+ assert (
+ Official(
+ official_id="42",
+ first_name="Pierluigi",
+ last_name="Collina",
+ role=OfficialType.MainReferee,
+ ).full_name
+ == "Pierluigi Collina"
+ )
+ assert (
+ Official(
+ official_id="42",
+ last_name="Collina",
+ role=OfficialType.MainReferee,
+ ).full_name
+ == "Collina"
+ )
+ assert (
+ Official(official_id="42", role=OfficialType.MainReferee).full_name
+ == "main_referee_42"
+ )
+ assert Official(official_id="42").full_name == "official_42"
diff --git a/kloppy/tests/test_statsperform.py b/kloppy/tests/test_statsperform.py
index f1c772d1..9e8e9070 100644
--- a/kloppy/tests/test_statsperform.py
+++ b/kloppy/tests/test_statsperform.py
@@ -177,7 +177,7 @@ def test_deserialize_all(self, event_dataset: EventDataset):
pitch_length=None,
pitch_width=None,
)
- assert len(event_dataset.records) == 1652
+ assert len(event_dataset.records) == 1643
substitution_events = event_dataset.find_all("substitution")
assert len(substitution_events) == 9
diff --git a/kloppy/tests/test_wyscout.py b/kloppy/tests/test_wyscout.py
index 725ae92f..d5cb8b2f 100644
--- a/kloppy/tests/test_wyscout.py
+++ b/kloppy/tests/test_wyscout.py
@@ -2,31 +2,33 @@
from pathlib import Path
import pytest
+
+from kloppy import wyscout
from kloppy.domain import (
BodyPart,
BodyPartQualifier,
- Point,
- EventDataset,
- SetPieceType,
- SetPieceQualifier,
+ CardQualifier,
+ CardType,
DatasetType,
DuelQualifier,
DuelType,
+ EventDataset,
EventType,
- GoalkeeperQualifier,
+ FormationType,
GoalkeeperActionType,
- CardQualifier,
- CardType,
+ GoalkeeperQualifier,
Orientation,
+ PassQualifier,
PassResult,
- FormationType,
- Time,
PassType,
- PassQualifier,
+ Point,
+ PositionType,
+ SetPieceQualifier,
+ SetPieceType,
+ ShotResult,
+ Time,
)
-from kloppy import wyscout
-
@pytest.fixture(scope="session")
def event_v2_data(base_dir: Path) -> Path:
@@ -203,6 +205,12 @@ def test_metadata(self, dataset: EventDataset):
== FormationType.FOUR_THREE_ONE_TWO
)
+ cr7 = dataset.metadata.teams[0].get_player_by_id("3322")
+
+ assert cr7.full_name == "Cristiano Ronaldo dos Santos Aveiro"
+ assert cr7.starting is True
+ assert cr7.positions.last() == PositionType.Striker
+
def test_enriched_metadata(self, dataset: EventDataset):
date = dataset.metadata.date
if date:
@@ -261,12 +269,25 @@ def test_shot_assist_event(self, dataset: EventDataset):
)
def test_shot_event(self, dataset: EventDataset):
- shot_event = dataset.get_event_by_id(1927028534)
- assert shot_event.event_type == EventType.SHOT
+ # a blocked free kick shot
+ blocked_shot_event = dataset.get_event_by_id(1927028534)
+ assert blocked_shot_event.event_type == EventType.SHOT
+ assert blocked_shot_event.result == ShotResult.BLOCKED
+ assert blocked_shot_event.result_coordinates == Point(x=77.0, y=21.0)
assert (
- shot_event.get_qualifier_value(SetPieceQualifier)
+ blocked_shot_event.get_qualifier_value(SetPieceQualifier)
== SetPieceType.FREE_KICK
)
+ # off target shot
+ off_target_shot = dataset.get_event_by_id(1927028562)
+ assert off_target_shot.event_type == EventType.SHOT
+ assert off_target_shot.result == ShotResult.OFF_TARGET
+ assert off_target_shot.result_coordinates is None
+ # on target shot
+ on_target_shot = dataset.get_event_by_id(1927028637)
+ assert on_target_shot.event_type == EventType.SHOT
+ assert on_target_shot.result == ShotResult.SAVED
+ assert on_target_shot.result_coordinates == Point(100.0, 45.0)
def test_foul_committed_event(self, dataset: EventDataset):
foul_committed_event = dataset.get_event_by_id(1927028873)
diff --git a/kloppy/utils.py b/kloppy/utils.py
index b0858398..68d36af2 100644
--- a/kloppy/utils.py
+++ b/kloppy/utils.py
@@ -169,3 +169,8 @@ def __get__(self, instance, owner):
stacklevel=2,
)
return self.value
+
+
+def snake_case(s: str) -> str:
+ """Convert a string to snake_case."""
+ return re.sub(r"[\s\-]+", "_", s.strip()).lower()