Skip to content

Commit

Permalink
Merge branch 'master' into feature/statistics
Browse files Browse the repository at this point in the history
# Conflicts:
#	kloppy/infra/serializers/event/wyscout/deserializer_v3.py
  • Loading branch information
DriesDeprest committed Dec 16, 2024
2 parents 9b6f9af + dff0204 commit c2002ce
Show file tree
Hide file tree
Showing 9 changed files with 452 additions and 74 deletions.
157 changes: 155 additions & 2 deletions kloppy/_providers/sportec.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Optional, List
from typing import List, Optional

from requests.exceptions import HTTPError

from kloppy.config import get_config
from kloppy.domain import EventDataset, EventFactory, TrackingDataset
Expand All @@ -10,7 +12,7 @@
SportecTrackingDataDeserializer,
SportecTrackingDataInputs,
)
from kloppy.io import open_as_file, FileLike
from kloppy.io import FileLike, open_as_file
from kloppy.utils import deprecated


Expand Down Expand Up @@ -82,3 +84,154 @@ def load(
return load_event(
event_data, meta_data, event_types, coordinates, event_factory
)


def get_IDSSE_url(match_id: str, data_type: str) -> str:
"""Returns the URL for the meta, event or tracking data for a match in the IDDSE dataset."""
# match_id -> file_id
DATA_MAP = {
"J03WPY": {"meta": 48392497, "event": 48392542, "tracking": 48392572},
"J03WN1": {"meta": 48392491, "event": 48392527, "tracking": 48392512},
"J03WMX": {"meta": 48392485, "event": 48392524, "tracking": 48392539},
"J03WOH": {"meta": 48392515, "event": 48392500, "tracking": 48392578},
"J03WQQ": {"meta": 48392488, "event": 48392521, "tracking": 48392545},
"J03WOY": {"meta": 48392503, "event": 48392518, "tracking": 48392551},
"J03WR9": {"meta": 48392494, "event": 48392530, "tracking": 48392563},
}
# URL constant
DATA_URL = "https://figshare.com/ndownloader/files/{file_id}?private_link=1f806cb3e755c6b54e05"

if data_type not in ["meta", "event", "tracking"]:
raise ValueError(
f"Data type should be one of ['meta', 'event', 'tracking'], but got {data_type}"
)
if match_id not in DATA_MAP:
raise ValueError(
f"This match_id is not available, please select from {list(DATA_MAP.keys())}"
)
return DATA_URL.format(file_id=str(DATA_MAP[match_id][data_type]))


def load_open_event_data(
match_id: str = "J03WPY",
event_types: Optional[List[str]] = None,
coordinates: Optional[str] = None,
event_factory: Optional[EventFactory] = None,
) -> EventDataset:
"""
Load event data for a game from the IDSSE dataset.
The IDSSE dataset will be released with the publication of the *An integrated
dataset of synchronized spatiotemporal and event data in elite soccer*
paper [1]_ and is released under the Creative Commons Attribution 4.0
license.
Args:
match_id (str, optional):
Match-ID of one of the matches. Defaults to `'J03WPY'`. See below
for available matches.
event_types:
coordinates:
event_factory:
Notes:
The dataset contains seven full matches of raw event and position data
for both teams and the ball from the German Men's Bundesliga season
2022/23 first and second division. A detailed description of the
dataset as well as the collection process can be found in the
accompanying paper.
The following matches are available::
matches = {
'J03WMX': 1. FC Köln vs. FC Bayern München,
'J03WN1': VfL Bochum 1848 vs. Bayer 04 Leverkusen,
'J03WPY': Fortuna Düsseldorf vs. 1. FC Nürnberg,
'J03WOH': Fortuna Düsseldorf vs. SSV Jahn Regensburg,
'J03WQQ': Fortuna Düsseldorf vs. FC St. Pauli,
'J03WOY': Fortuna Düsseldorf vs. F.C. Hansa Rostock,
'J03WR9': Fortuna Düsseldorf vs. 1. FC Kaiserslautern
}
References:
.. [1] Bassek, M., Weber, H., Rein, R., & Memmert, D. (2024). "An integrated
dataset of synchronized spatiotemporal and event data in elite soccer."
In Submission.
"""
try:
return load_event(
event_data=get_IDSSE_url(match_id, "event"),
meta_data=get_IDSSE_url(match_id, "meta"),
event_types=event_types,
coordinates=coordinates,
event_factory=event_factory,
)
except HTTPError as e:
raise HTTPError(
"Unable to retrieve data. The dataset archive location may have changed. "
"See https://github.com/PySport/kloppy/issues/369 for details."
) from e


def load_open_tracking_data(
match_id: str = "J03WPY",
sample_rate: Optional[float] = None,
limit: Optional[int] = None,
coordinates: Optional[str] = None,
only_alive: Optional[bool] = True,
) -> TrackingDataset:
"""
Load tracking data for a game from the IDSSE dataset.
The IDSSE dataset will be released with the publication of the *An integrated
dataset of synchronized spatiotemporal and event data in elite soccer*
paper [1]_ and is released under the Creative Commons Attribution 4.0
license.
Args:
match_id (str, optional):
Match-ID of one of the matches. Defaults to `'J03WPY'`. See below
for available matches.
sampe_rate:
limit:
coordinates:
only_alive:
Notes:
The dataset contains seven full matches of raw event and position data
for both teams and the ball from the German Men's Bundesliga season
2022/23 first and second division. A detailed description of the
dataset as well as the collection process can be found in the
accompanying paper.
The following matches are available::
matches = {
'J03WMX': 1. FC Köln vs. FC Bayern München,
'J03WN1': VfL Bochum 1848 vs. Bayer 04 Leverkusen,
'J03WPY': Fortuna Düsseldorf vs. 1. FC Nürnberg,
'J03WOH': Fortuna Düsseldorf vs. SSV Jahn Regensburg,
'J03WQQ': Fortuna Düsseldorf vs. FC St. Pauli,
'J03WOY': Fortuna Düsseldorf vs. F.C. Hansa Rostock,
'J03WR9': Fortuna Düsseldorf vs. 1. FC Kaiserslautern
}
References:
.. [1] Bassek, M., Weber, H., Rein, R., & Memmert, D. (2024). "An integrated
dataset of synchronized spatiotemporal and event data in elite soccer."
In Submission.
"""
try:
return load_tracking(
raw_data=get_IDSSE_url(match_id, "tracking"),
meta_data=get_IDSSE_url(match_id, "meta"),
sample_rate=sample_rate,
limit=limit,
coordinates=coordinates,
only_alive=only_alive,
)
except HTTPError as e:
raise HTTPError(
"Unable to retrieve data. The dataset archive location may have changed. "
"See https://github.com/PySport/kloppy/issues/369 for details."
) from e
2 changes: 2 additions & 0 deletions kloppy/domain/models/position.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ class PositionType(Enum):
CenterBack = ("Center Back", "CB", "Defender")
LeftCenterBack = ("Left Center Back", "LCB", "CenterBack")
RightCenterBack = ("Right Center Back", "RCB", "CenterBack")
LeftWingBack = ("Left Wing Back", "LWB", "WingBack")
RightWingBack = ("Right Wing Back", "RWB", "WingBack")

Midfielder = ("Midfielder", "MID", None)
DefensiveMidfield = ("Defensive Midfield", "DM", "Midfielder")
Expand Down
43 changes: 38 additions & 5 deletions kloppy/infra/serializers/event/statsperform/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,9 @@
}


def _parse_pass(raw_event: OptaEvent) -> Dict:
def _parse_pass(
raw_event: OptaEvent, next_event: OptaEvent, next_next_event: OptaEvent
) -> Dict:
if raw_event.outcome:
result = PassResult.COMPLETE
else:
Expand All @@ -255,6 +257,21 @@ def _parse_pass(raw_event: OptaEvent) -> Dict:

qualifiers = pass_qualifiers + overall_qualifiers

# Set the end location of a deflected pass to the start location
# of the next action and the outcome to "success" if the deflected
# pass reached a teammate
if next_event is not None and next_next_event is not None:
if (
next_event.type_id == EVENT_TYPE_BALL_TOUCH
and next_event.outcome == 1
and next_next_event.contestant_id == raw_event.contestant_id
):
result = PassResult.COMPLETE
receiver_coordinates = Point(
x=next_next_event.x,
y=next_next_event.y,
)

return dict(
result=result,
receiver_coordinates=receiver_coordinates,
Expand Down Expand Up @@ -692,11 +709,16 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
f"Unknown team_id {raw_event.contestant_id}"
)

next_event_elm = (
next_event = (
raw_events[idx + 1]
if (idx + 1) < len(raw_events)
else None
)
next_next_event = (
raw_events[idx + 2]
if (idx + 2) < len(raw_events)
else None
)
period = next(
(
period
Expand Down Expand Up @@ -726,6 +748,15 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
# not started yet
continue

if raw_event.contestant_id == teams[0].team_id:
team = teams[0]
elif raw_event.contestant_id == teams[1].team_id:
team = teams[1]
else:
raise DeserializationError(
f"Unknown team_id {raw_event.contestant_id}"
)

player = None
if raw_event.player_id is not None:
player = team.get_player_by_id(raw_event.player_id)
Expand Down Expand Up @@ -753,7 +784,9 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
)

if raw_event.type_id == EVENT_TYPE_PASS:
pass_event_kwargs = _parse_pass(raw_event)
pass_event_kwargs = _parse_pass(
raw_event, next_event, next_next_event
)
event = self.event_factory.build_pass(
**pass_event_kwargs,
**generic_event_kwargs,
Expand Down Expand Up @@ -815,7 +848,7 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
EVENT_TYPE_BLOCKED_PASS,
):
interception_event_kwargs = _parse_interception(
raw_event, team, next_event_elm
raw_event, team, next_event
)
event = self.event_factory.build_interception(
**interception_event_kwargs,
Expand Down Expand Up @@ -878,7 +911,7 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
timedelta(0), generic_event_kwargs["timestamp"]
)
substitution_event_kwargs = _parse_substitution(
next_event_elm, team
next_event, team
)
event = self.event_factory.build_substitution(
result=None,
Expand Down
Loading

0 comments on commit c2002ce

Please sign in to comment.