Skip to content

Commit

Permalink
Merge pull request #26 from PySport/opta-serializer
Browse files Browse the repository at this point in the history
Opta serializer
  • Loading branch information
koenvo authored Jun 18, 2020
2 parents 5882aca + 5222fb4 commit 13f3a73
Show file tree
Hide file tree
Showing 16 changed files with 775 additions and 27 deletions.
6 changes: 5 additions & 1 deletion CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,8 @@ v0.5.1, 2020-06-13 -- Add stats in json/text format to kloppy-query
v0.5.2, 2020-06-13 -- Fix Transformer when ball position is not set (@benoitblanc)
Fix for working with periods in EPTS Serializer (@bdagnino)
v0.5.3, 2020-06-16 -- Add code formatting and contributing guide (@dmallory42)
Add support for python 3.6
Add support for python 3.6
v0.6.0, 2020-06-18 -- Add Opta event serializer
Fix for event pattern matching for nested captures
Fix for event pattern matching when multiple paths can match
Improved ball_recovery example
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Here are just a few of the things that kloppy does well:
- Out-of-the-box [**(De)serializing**](#serializing) tracking- and event data from different sources into standardized models and vice versa
- Flexible [**pitch dimensions**](#pitch-dimensions) transformer for changing a dataset pitch dimensions from one to another (eg OPTA's 100x100 -> TRACAB meters)
- Intelligent [**orientation**](#orientation) transforming orientation of a dataset (eg from TRACAB fixed orientation to "Home Team" orientation)
- Search for [**complexe patterns**](examples/pattern_matching/repository/README.md) in event data.


## Where to get it
Expand Down
36 changes: 31 additions & 5 deletions examples/pattern_matching/ball_recovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,40 @@ def main():
timestamp=pm.function(
lambda timestamp, last_pass_of_team_a_timestamp: timestamp
- last_pass_of_team_a_timestamp
< 10
< 15
),
capture="recover",
)
+ (
pm.match_pass(
success=True, team=pm.same_as("last_pass_of_team_a.team")
# resulted in possession after 5 seconds
pm.group(
pm.match_pass(
success=True,
team=pm.same_as("recover.team"),
timestamp=pm.function(
lambda timestamp, recover_timestamp, **kwargs: timestamp
- recover_timestamp
< 5
),
)
* slice(None, None)
+ pm.match_pass(
success=True,
team=pm.same_as("recover.team"),
timestamp=pm.function(
lambda timestamp, recover_timestamp, **kwargs: timestamp
- recover_timestamp
> 5
),
)
)
| pm.group(
pm.match_pass(
success=True, team=pm.same_as("recover.team")
)
* slice(None, None)
+ pm.match_shot(team=pm.same_as("recover.team"))
)
| pm.match_shot(team=pm.same_as("last_pass_of_team_a.team"))
),
capture="success",
)
Expand All @@ -77,7 +103,7 @@ def main():
success = "success" in match.captures

if success:
print(match)
print(team, match.events[0].timestamp)

counter.update(
{f"{team}_total": 1, f"{team}_success": 1 if success else 0}
Expand Down
33 changes: 29 additions & 4 deletions examples/pattern_matching/repository/ball_recovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,39 @@
timestamp=pm.function(
lambda timestamp, last_pass_of_team_a_timestamp: timestamp
- last_pass_of_team_a_timestamp
< 10
< 15
),
capture="recover",
)
+ (
pm.match_pass(
success=True, team=pm.same_as("last_pass_of_team_a.team")
pm.group(
pm.match_pass(
success=True,
team=pm.same_as("recover.team"),
timestamp=pm.function(
lambda timestamp, recover_timestamp, **kwargs: timestamp
- recover_timestamp
< 5
),
)
* slice(None, None)
+ pm.match_pass(
success=True,
team=pm.same_as("recover.team"),
timestamp=pm.function(
lambda timestamp, recover_timestamp, **kwargs: timestamp
- recover_timestamp
> 5
),
)
)
| pm.group(
pm.match_pass(
success=True, team=pm.same_as("recover.team")
)
* slice(None, None)
+ pm.match_shot(team=pm.same_as("recover.team"))
)
| pm.match_shot(team=pm.same_as("last_pass_of_team_a.team"))
),
capture="success",
)
Expand Down
51 changes: 43 additions & 8 deletions kloppy/cmdline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@

from xml.etree import ElementTree as ET

from kloppy import load_statsbomb_event_data, event_pattern_matching as pm
from kloppy import (
load_statsbomb_event_data,
load_opta_event_data,
event_pattern_matching as pm,
)
from kloppy.infra.utils import performance_logging

sys.path.append(".")
Expand Down Expand Up @@ -44,11 +48,17 @@ def write_to_xml(video_fragments: List[VideoFragment], filename):
tree.write(filename, xml_declaration=True, encoding="utf-8", method="xml")


def _format_time(seconds: float) -> str:
minutes, seconds = divmod(seconds, 60)
return f"{minutes:02.0f}:{seconds:02.0f}"


def print_match(id_: int, match, success: bool, label):
print(f"Match {id_}: {label} {'SUCCESS' if success else 'no-success'}")
for event in match.events:
time = _format_time(event.timestamp)
print(
f"{event.event_id} {event.event_type} {str(event.result).ljust(10)} / {event.period.id}: {event.timestamp:.3f} / {event.team} {str(event.player_jersey_no).rjust(2)} / {event.position.x}x{event.position.y}"
f"{event.event_id} {event.event_type} {str(event.result).ljust(10)} / P{event.period.id} {time} / {event.team} {str(event.player_jersey_no).rjust(2)} / {event.position.x}x{event.position.y}"
)
print("")

Expand All @@ -69,6 +79,9 @@ def run_query(argv=sys.argv[1:]):
"--input-statsbomb",
help="StatsBomb event input files (events.json,lineup.json)",
)
parser.add_argument(
"--input-opta", help="Opta event input files (f24.xml,f7.xml)",
)
parser.add_argument("--output-xml", help="Output file")
parser.add_argument(
"--with-success",
Expand Down Expand Up @@ -96,6 +109,12 @@ def run_query(argv=sys.argv[1:]):
help="Show events for each match",
action="store_true",
)
parser.add_argument(
"--only-success",
default=False,
help="Only show/output success cases",
action="store_true",
)

logger = logging.getLogger("run_query")
logging.basicConfig(
Expand All @@ -117,6 +136,14 @@ def run_query(argv=sys.argv[1:]):
lineup_filename.strip(),
options={"event_types": query.event_types},
)
if opts.input_opta:
with performance_logging("load dataset", logger=logger):
f24_filename, f7_filename = opts.input_opta.split(",")
dataset = load_opta_event_data(
f24_filename.strip(),
f7_filename.strip(),
options={"event_types": query.event_types},
)

if not dataset:
raise Exception("You have to specify a dataset.")
Expand All @@ -134,22 +161,30 @@ def run_query(argv=sys.argv[1:]):
{f"{team}_total": 1, f"{team}_success": 1 if success else 0}
)

if opts.show_events:
should_process = not opts.only_success or success
if opts.show_events and should_process:
print_match(i, match, success, str(team))

if opts.output_xml:
if opts.output_xml and should_process:
relative_period_start = 0
for period in dataset.periods:
if period == match.events[0].period:
break
else:
relative_period_start += period.duration

label = str(team)
if opts.with_success and success:
label += " success"

start_timestamp = (
match.events[0].timestamp
+ match.events[0].period.start_timestamp
relative_period_start
+ match.events[0].timestamp
- opts.prepend_time
)
end_timestamp = (
match.events[-1].timestamp
+ match.events[-1].period.start_timestamp
relative_period_start
+ match.events[-1].timestamp
+ opts.append_time
)

Expand Down
4 changes: 4 additions & 0 deletions kloppy/domain/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def attacking_direction_set(self):
def set_attacking_direction(self, attacking_direction: AttackingDirection):
self.attacking_direction = attacking_direction

@property
def duration(self):
return self.end_timestamp - self.start_timestamp


class DatasetFlag(Flag):
BALL_OWNING_TEAM = 1
Expand Down
30 changes: 24 additions & 6 deletions kloppy/domain/services/matchers/pattern/event.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import defaultdict
from dataclasses import dataclass
from functools import partial
from typing import Callable, Tuple, Dict, List, Iterator
Expand All @@ -12,19 +13,24 @@
)
from .regexp import *
from .regexp import _make_match, _TrailItem
from .regexp.regexp import _Match


class WithCaptureMatcher(Matcher):
def __init__(self, matcher: Callable[[Tok, Dict[str, List[Tok]]], bool]):
self.matcher = matcher

def _add_captures(self, captures: Dict[str, List[Tok]], match: _Match):
for name, capture in match.children.items():
captures[name] = capture[0].trail
self._add_captures(captures, capture[0])

def match(
self, token: Tok, trail: Tuple[_TrailItem[Out], ...]
) -> Iterator[Out]:
match = _make_match(trail)
captures = {
name: capture[0].trail for name, capture in match.children.items()
}
captures = {}
self._add_captures(captures, match)
if self.matcher(token, captures):
yield token

Expand Down Expand Up @@ -61,6 +67,7 @@ def _matcher_fn(event: Event, captures: Dict[str, List[Event]]) -> bool:
match_shot = partial(match_generic, ShotEvent)
match_carry = partial(match_generic, CarryEvent)
match_take_on = partial(match_generic, TakeOnEvent)
match_any = partial(match_generic, Event)


def same_as(capture: str):
Expand Down Expand Up @@ -110,9 +117,20 @@ def search(dataset: EventDataset, pattern: Node[Tok, Out]):
re = RegExp.from_ast(pattern)

results = []
events_per_period = defaultdict(list)
for event in events:
events_per_period[event.period.id].append(event)

for period, events_ in sorted(events_per_period.items()):
# Search per period. Patterns should never match over periods
results.extend(_search(events_, re))
return results


def _search(events: List[Event], re: RegExp[Tok, Out]):
i = 0
c = len(events)
while i < c:
results = []
for i in range(len(events)):
matches = re.match(events[i:], consume_all=False)
if matches:
results.append(
Expand All @@ -128,7 +146,6 @@ def search(dataset: EventDataset, pattern: Node[Tok, Out]):
},
)
)
i += 1

return results

Expand All @@ -145,6 +162,7 @@ class Query:
"match_carry",
"match_take_on",
"match_shot",
"match_any",
"same_as",
"not_same_as",
"function",
Expand Down
15 changes: 15 additions & 0 deletions kloppy/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
MetricaTrackingSerializer,
EPTSSerializer,
StatsBombSerializer,
OptaSerializer,
)
from .domain import (
Dataset,
Expand Down Expand Up @@ -84,6 +85,19 @@ def load_statsbomb_event_data(
)


def load_opta_event_data(
f24_data_filename: str, f7_data_filename: str, options: dict = None
) -> EventDataset:
serializer = OptaSerializer()
with open(f24_data_filename, "rb") as f24_data, open(
f7_data_filename, "rb"
) as f7_data:

return serializer.deserialize(
inputs={"f24_data": f24_data, "f7_data": f7_data}, options=options,
)


DatasetType = TypeVar("DatasetType")


Expand Down Expand Up @@ -205,6 +219,7 @@ def to_pandas(
"load_metrica_tracking_data",
"load_epts_tracking_data",
"load_statsbomb_event_data",
"load_opta_event_data",
"to_pandas",
"transform",
]
2 changes: 1 addition & 1 deletion kloppy/infra/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
MetricaTrackingSerializer,
EPTSSerializer,
)
from .event import StatsBombSerializer
from .event import StatsBombSerializer, OptaSerializer

# NOT YET: from .event import EventDataSerializer, MetricaEventSerializer
1 change: 1 addition & 0 deletions kloppy/infra/serializers/event/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@

# from .metrica import MetricaEventSerializer
from .statsbomb import StatsBombSerializer
from .opta import OptaSerializer
1 change: 1 addition & 0 deletions kloppy/infra/serializers/event/opta/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .serializer import OptaSerializer
Loading

0 comments on commit 13f3a73

Please sign in to comment.