Skip to content

Commit

Permalink
Statsbomb: even more fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
koenvossen committed Jun 2, 2020
1 parent 351156d commit 201e9bd
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 9 deletions.
33 changes: 31 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ from kloppy import (
load_metrica_tracking_data,
load_tracab_tracking_data,
load_epts_tracking_data,
load_statsbomb_event_data,
to_pandas,
transform
)
Expand All @@ -53,6 +54,10 @@ dataset = load_tracab_tracking_data('meta.xml', 'raw_data.txt')
# or epts
dataset = load_epts_tracking_data('meta.xml', 'raw_data.txt')

# or event data
dataset = load_statsbomb_event_data('event_data.json', 'lineup.json')


dataset = transform(dataset, pitch_dimensions=[[0, 108], [-34, 34]])
pandas_data_frame = to_pandas(dataset)
```
Expand Down Expand Up @@ -143,6 +148,30 @@ with open("raw_data.txt", "rb") as raw, \
```


or StatsBomb event data
```python
from kloppy import StatsBombSerializer

serializer = StatsBombSerializer()

with open("events/123123.json", "rb") as event_data, \
open("lineup/123123.json", "rb") as lineup_data:

dataset = serializer.deserialize(
inputs={
'event_data': event_data,
'lineup_data': lineup_data
},
options={
"event_types": ["pass", "shot", "carry", "take_on"]
}
)

# start working with dataset
```



### <a name="pitch-dimensions"></a>Transform the pitch dimensions
Data providers use their own pitch dimensions. Some use actual meters while others use 100x100. Use the Transformer to get from one pitch dimensions to another one.
```python
Expand Down Expand Up @@ -195,7 +224,7 @@ Data models
- [ ] Automated tests
- [x] Pitch
- [x] Tracking
- [ ] Event
- [x] Event

Tracking data (de)serializers
- [x] Automated tests
Expand All @@ -207,7 +236,7 @@ Tracking data (de)serializers
Event data (de)serializers
- [ ] Automated tests
- [ ] OPTA
- [ ] StatsBomb
- [x] StatsBomb
- [ ] MetricaSports

Transformers
Expand Down
6 changes: 6 additions & 0 deletions examples/datasets/metrica.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import logging
import sys

from kloppy import datasets, to_pandas


Expand All @@ -6,6 +9,9 @@ def main():
This example shows the use of Metrica datasets, and how we can pass argument
to the dataset loader.
"""
logging.basicConfig(stream=sys.stdout, level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")


# The metrica dataset loader loads by default the 'game1' dataset
dataset = datasets.load("metrica_tracking", options={'sample_rate': 1./12, 'limit': 10})
Expand Down
8 changes: 7 additions & 1 deletion examples/datasets/statsbomb.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import sys

from kloppy import datasets, transform, to_pandas
from kloppy import datasets, transform, to_pandas, load_statsbomb_event_data
from kloppy.infra.utils import performance_logging


Expand Down Expand Up @@ -32,6 +32,12 @@ def main():

print(dataframe[:100].to_string())

# or load it using the helper from disk
dataset = load_statsbomb_event_data(
"events/15946.json",
"lineups/15946.json"
)


if __name__ == "__main__":
main()
6 changes: 6 additions & 0 deletions examples/epts/load_epts_into_pandas.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import logging
import sys

from pandas import DataFrame

from kloppy.infra.serializers.tracking.epts.meta_data import load_meta_data as epts_load_meta_data
Expand All @@ -17,6 +20,9 @@ def main():
4. Try to consume items from generator twice
4. Convert the records into a pandas dataframe for easy data mangling
"""
logging.basicConfig(stream=sys.stdout, level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")


# step 1: load metadata
with open("epts_meta.xml", "rb") as meta_fp:
Expand Down
4 changes: 4 additions & 0 deletions examples/playing_time.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import logging
import sys
from collections import Counter

from kloppy import datasets
Expand All @@ -8,6 +10,8 @@ def main():
"""
This example shows how to determine playing time
"""
logging.basicConfig(stream=sys.stdout, level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

dataset = datasets.load("metrica_tracking", options={'sample_rate': 1./25})

Expand Down
11 changes: 6 additions & 5 deletions kloppy/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ def load_epts_tracking_data(meta_data_filename: str, raw_data_filename: str, opt
)


def load_statsbomb_event_data(lineup_filename: str, raw_data_filename: str, options: dict = None) -> EventDataset:
def load_statsbomb_event_data(event_data_filename: str, lineup_data_filename: str, options: dict = None) -> EventDataset:
serializer = StatsBombSerializer()
with open(lineup_filename, "rb") as lineup_data, \
open(raw_data_filename, "rb") as raw_data:
with open(event_data_filename, "rb") as event_data, \
open(lineup_data_filename, "rb") as lineup_data:

return serializer.deserialize(
inputs={
'lineup_data': lineup_data,
'raw_data': raw_data
'event_data': event_data,
'lineup_data': lineup_data
},
options=options
)
Expand Down Expand Up @@ -166,6 +166,7 @@ def to_pandas(dataset: Dataset, _record_converter: Callable = None) -> 'DataFram
'load_tracab_tracking_data',
'load_metrica_tracking_data',
'load_epts_tracking_data',
'load_statsbomb_event_data',
'to_pandas',
'transform'
]
2 changes: 2 additions & 0 deletions kloppy/infra/serializers/event/statsbomb/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Even
>>> )
"""
self.__validate_inputs(inputs)
if not options:
options = {}

with performance_logging("load data", logger=logger):
raw_events = json.load(inputs['event_data'])
Expand Down
2 changes: 1 addition & 1 deletion kloppy/infra/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def performance_logging(description: str, counter: int = None, logger=None):
extra = f" ({int(counter / took * 1000)}items/sec)"

unit = "ms"
if took < 0.01:
if took < 0.1:
took *= 1000
unit = "us"

Expand Down

0 comments on commit 201e9bd

Please sign in to comment.