diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..2483976d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+.idea/
+__pycache__/
diff --git a/README.md b/README.md
index b1a9f043..36850173 100644
--- a/README.md
+++ b/README.md
@@ -47,14 +47,14 @@ from kloppy import (
)
# metrica data
-data_set = load_metrica_tracking_data('home_file.csv', 'away_file.csv')
+dataset = load_metrica_tracking_data('home_file.csv', 'away_file.csv')
# or tracab
-data_set = load_tracab_tracking_data('meta.xml', 'raw_data.txt')
+dataset = load_tracab_tracking_data('meta.xml', 'raw_data.txt')
# or epts
-data_set = load_epts_tracking_data('meta.xml', 'raw_data.txt')
+dataset = load_epts_tracking_data('meta.xml', 'raw_data.txt')
-data_set = transform(data_set, pitch_dimensions=[[0, 108], [-34, 34]])
-pandas_data_frame = to_pandas(data_set)
+dataset = transform(dataset, pitch_dimensions=[[0, 108], [-34, 34]])
+pandas_data_frame = to_pandas(dataset)
```
### Public datasets / Very quick start
@@ -63,7 +63,7 @@ we added a "dataset loader" which does all the heavy lifting for you: find urls,
```python
from kloppy import datasets
-data_set = datasets.load("metrica_tracking", options={'sample_rate': 1./12, 'limit': 10})
+dataset = datasets.load("metrica_tracking", options={'sample_rate': 1./12, 'limit': 10})
```
### Standardized models
@@ -84,7 +84,7 @@ serializer = TRACABSerializer()
with open("tracab_data.dat", "rb") as raw, \
open("tracab_metadata.xml", "rb") as meta:
- data_set = serializer.deserialize(
+ dataset = serializer.deserialize(
inputs={
'raw_data': raw,
'meta_data': meta
@@ -94,7 +94,7 @@ with open("tracab_data.dat", "rb") as raw, \
}
)
- # start working with data_set
+ # start working with dataset
```
or Metrica data
@@ -106,7 +106,7 @@ serializer = MetricaTrackingSerializer()
with open("Sample_Game_1_RawTrackingData_Away_Team.csv", "rb") as raw_away, \
open("Sample_Game_1_RawTrackingData_Home_Team.csv", "rb") as raw_home:
- data_set = serializer.deserialize(
+ dataset = serializer.deserialize(
inputs={
'raw_data_home': raw_home,
'raw_data_away': raw_away
@@ -116,7 +116,7 @@ with open("Sample_Game_1_RawTrackingData_Away_Team.csv", "rb") as raw_away, \
}
)
- # start working with data_set
+ # start working with dataset
```
@@ -129,7 +129,7 @@ serializer = EPTSSerializer()
with open("raw_data.txt", "rb") as raw, \
open("metadata.xml", "rb") as meta:
- data_set = serializer.deserialize(
+ dataset = serializer.deserialize(
inputs={
'raw_data': raw,
'meta_data': meta
@@ -139,7 +139,7 @@ with open("raw_data.txt", "rb") as raw, \
}
)
- # start working with data_set
+ # start working with dataset
```
@@ -148,9 +148,9 @@ Data providers use their own pitch dimensions. Some use actual meters while othe
```python
from kloppy.domain import Transformer, PitchDimensions, Dimension
-# use deserialized `data_set`
-new_data_set = Transformer.transform_data_set(
- data_set,
+# use deserialized `dataset`
+new_dataset = Transformer.transform_dataset(
+ dataset,
to_pitch_dimensions=PitchDimensions(
x_dim=Dimension(0, 100),
y_dim=Dimension(0, 100)
@@ -166,8 +166,8 @@ Data providers can use different orientations. Some use a fixed orientation and
```python
from kloppy.domain import Transformer, Orientation
-new_data_set = Transformer.transform_data_set(
- data_set,
+new_dataset = Transformer.transform_dataset(
+ dataset,
to_orientation=Orientation.BALL_OWNING_TEAM
)
```
@@ -176,9 +176,9 @@ new_data_set = Transformer.transform_data_set(
```python
from kloppy.domain import Transformer, PitchDimensions, Dimension, Orientation
-# use deserialized `data_set`
-new_data_set = Transformer.transform_data_set(
- data_set,
+# use deserialized `dataset`
+new_dataset = Transformer.transform_dataset(
+ dataset,
to_pitch_dimensions=PitchDimensions(
x_dim=Dimension(0, 100),
y_dim=Dimension(0, 100)
diff --git a/examples/datasets/metrica.py b/examples/datasets/metrica.py
index d0322a6b..3564119b 100644
--- a/examples/datasets/metrica.py
+++ b/examples/datasets/metrica.py
@@ -8,13 +8,13 @@ def main():
"""
# The metrica dataset loader loads by default the 'game1' dataset
- data_set = datasets.load("metrica_tracking", options={'sample_rate': 1./12, 'limit': 10})
- print(len(data_set.frames))
+ dataset = datasets.load("metrica_tracking", options={'sample_rate': 1./12, 'limit': 10})
+ print(len(dataset.frames))
# We can pass additional keyword arguments to the loaders to specify a different dataset
- data_set = datasets.load("metrica_tracking", options={'limit': 1000}, game='game2')
+ dataset = datasets.load("metrica_tracking", options={'limit': 1000}, game='game2')
- data_frame = to_pandas(data_set)
+ data_frame = to_pandas(dataset)
print(data_frame)
diff --git a/examples/playing_time.py b/examples/playing_time.py
index 7b352c83..746e6034 100644
--- a/examples/playing_time.py
+++ b/examples/playing_time.py
@@ -9,10 +9,10 @@ def main():
This example shows how to determine playing time
"""
- data_set = datasets.load("metrica_tracking", options={'sample_rate': 1./25})
+ dataset = datasets.load("metrica_tracking", options={'sample_rate': 1./25})
playing_seconds_per_player = Counter()
- for frame in data_set.frames:
+ for frame in dataset.frames:
playing_seconds_per_player.update([int(jersey_no) for jersey_no in frame.home_team_player_positions.keys()])
x = range(len(playing_seconds_per_player))
diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py
index 2f345dcc..03a9bf53 100644
--- a/kloppy/domain/models/common.py
+++ b/kloppy/domain/models/common.py
@@ -93,7 +93,7 @@ def set_attacking_direction(self, attacking_direction: AttackingDirection):
self.attacking_direction = attacking_direction
-class DataSetFlag(Flag):
+class DatasetFlag(Flag):
BALL_OWNING_TEAM = 1
BALL_STATE = 2
@@ -108,8 +108,8 @@ class DataRecord(ABC):
@dataclass
-class DataSet(ABC):
- flags: DataSetFlag
+class Dataset(ABC):
+ flags: DatasetFlag
pitch_dimensions: PitchDimensions
orientation: Orientation
periods: List[Period]
diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py
index 02035e70..b5cf33ae 100644
--- a/kloppy/domain/models/event.py
+++ b/kloppy/domain/models/event.py
@@ -6,7 +6,7 @@
from typing import List, Union
from .pitch import Point
-from .common import DataRecord, DataSet, Team
+from .common import DataRecord, Dataset, Team
class SubType(Enum):
@@ -226,7 +226,7 @@ def event_type(self) -> EventType:
@dataclass
-class EventDataSet(DataSet):
+class EventDataset(Dataset):
records: List[Union[
SetPieceEvent, ShotEvent
]]
diff --git a/kloppy/domain/models/tracking.py b/kloppy/domain/models/tracking.py
index 59bf5280..01d35ed1 100644
--- a/kloppy/domain/models/tracking.py
+++ b/kloppy/domain/models/tracking.py
@@ -2,7 +2,7 @@
from typing import List, Dict
from .common import (
- DataSet,
+ Dataset,
DataRecord
)
from .pitch import Point
@@ -17,7 +17,7 @@ class Frame(DataRecord):
@dataclass
-class TrackingDataSet(DataSet):
+class TrackingDataset(Dataset):
frame_rate: int
records: List[Frame]
diff --git a/kloppy/domain/services/enrichers/__init__.py b/kloppy/domain/services/enrichers/__init__.py
index 8d6e9919..594ca0d3 100644
--- a/kloppy/domain/services/enrichers/__init__.py
+++ b/kloppy/domain/services/enrichers/__init__.py
@@ -1,12 +1,12 @@
from dataclasses import dataclass
-from ...models.tracking import DataSet as TrackingDataSet
-from ...models.event import DataSet as EventDataSet
-from ...models.common import DataSetFlag, Team, BallState
+from ...models.tracking import Dataset as TrackingDataset
+from ...models.event import Dataset as EventDataset
+from ...models.common import DatasetFlag, Team, BallState
class TrackingPossessionEnricher:
- def enrich_inplace(self, tracking_data_set: TrackingDataSet, event_data_set: EventDataSet) -> None:
+ def enrich_inplace(self, tracking_dataset: TrackingDataset, event_dataset: EventDataset) -> None:
"""
Return an enriched tracking data set.
@@ -16,20 +16,20 @@ def enrich_inplace(self, tracking_data_set: TrackingDataSet, event_data_set: Eve
they happen.
"""
- if tracking_data_set.flags & (DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE):
+ if tracking_dataset.flags & (DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE):
return
- if not event_data_set.flags & (DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE):
- raise Exception("Event DataSet does not contain ball owning team or ball state information")
+ if not event_dataset.flags & (DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE):
+ raise Exception("Event Dataset does not contain ball owning team or ball state information")
# set some defaults
next_event_idx = 0
current_ball_owning_team = None
current_ball_state = None
- for frame in tracking_data_set.records:
- if next_event_idx < len(event_data_set.records):
- event = event_data_set.records[next_event_idx]
+ for frame in tracking_dataset.records:
+ if next_event_idx < len(event_dataset.records):
+ event = event_dataset.records[next_event_idx]
if frame.period.id == event.period.id and frame.timestamp >= event.timestamp:
current_ball_owning_team = event.ball_owning_team
current_ball_state = event.ball_state
diff --git a/kloppy/domain/services/transformers/__init__.py b/kloppy/domain/services/transformers/__init__.py
index a7b79b24..9fb772f5 100644
--- a/kloppy/domain/services/transformers/__init__.py
+++ b/kloppy/domain/services/transformers/__init__.py
@@ -7,7 +7,7 @@
Frame,
Team, AttackingDirection,
- TrackingDataSet, DataSetFlag, DataSet, # NOT YET: EventDataSet
+ TrackingDataset, DatasetFlag, Dataset, # NOT YET: EventDataset
)
@@ -78,43 +78,43 @@ def transform_frame(self, frame: Frame) -> Frame:
}
)
- DataSetType = TypeVar('DataSetType')
+ DatasetType = TypeVar('DatasetType')
@classmethod
- def transform_data_set(cls,
- data_set: DataSetType,
+ def transform_dataset(cls,
+ dataset: DatasetType,
to_pitch_dimensions: PitchDimensions = None,
- to_orientation: Orientation = None) -> DataSetType:
+ to_orientation: Orientation = None) -> DatasetType:
if not to_pitch_dimensions and not to_orientation:
- return data_set
+ return dataset
elif not to_orientation:
- to_orientation = data_set.orientation
+ to_orientation = dataset.orientation
elif not to_pitch_dimensions:
- to_pitch_dimensions = data_set.pitch_dimensions
+ to_pitch_dimensions = dataset.pitch_dimensions
if to_orientation == Orientation.BALL_OWNING_TEAM:
- if not data_set.flags & DataSetFlag.BALL_OWNING_TEAM:
+ if not dataset.flags & DatasetFlag.BALL_OWNING_TEAM:
raise ValueError("Cannot transform to BALL_OWNING_TEAM orientation when dataset doesn't contain "
"ball owning team data")
transformer = cls(
- from_pitch_dimensions=data_set.pitch_dimensions,
- from_orientation=data_set.orientation,
+ from_pitch_dimensions=dataset.pitch_dimensions,
+ from_orientation=dataset.orientation,
to_pitch_dimensions=to_pitch_dimensions,
to_orientation=to_orientation
)
- if isinstance(data_set, TrackingDataSet):
- frames = list(map(transformer.transform_frame, data_set.records))
+ if isinstance(dataset, TrackingDataset):
+ frames = list(map(transformer.transform_frame, dataset.records))
- return TrackingDataSet(
- flags=data_set.flags,
- frame_rate=data_set.frame_rate,
- periods=data_set.periods,
+ return TrackingDataset(
+ flags=dataset.flags,
+ frame_rate=dataset.frame_rate,
+ periods=dataset.periods,
pitch_dimensions=to_pitch_dimensions,
orientation=to_orientation,
records=frames
)
- #elif isinstance(data_set, EventDataSet):
- # raise Exception("EventDataSet transformer not implemented yet")
+ #elif isinstance(dataset, EventDataset):
+ # raise Exception("EventDataset transformer not implemented yet")
else:
- raise Exception("Unknown DataSet type")
+ raise Exception("Unknown Dataset type")
diff --git a/kloppy/helpers.py b/kloppy/helpers.py
index a48176f6..eb65611d 100644
--- a/kloppy/helpers.py
+++ b/kloppy/helpers.py
@@ -1,10 +1,10 @@
from typing import Callable, TypeVar
from . import TRACABSerializer, MetricaTrackingSerializer, EPTSSerializer
-from .domain import DataSet, Frame, TrackingDataSet, Transformer, Orientation, PitchDimensions, Dimension
+from .domain import Dataset, Frame, TrackingDataset, Transformer, Orientation, PitchDimensions, Dimension
-def load_tracab_tracking_data(meta_data_filename: str, raw_data_filename: str, options: dict = None) -> DataSet:
+def load_tracab_tracking_data(meta_data_filename: str, raw_data_filename: str, options: dict = None) -> Dataset:
serializer = TRACABSerializer()
with open(meta_data_filename, "rb") as meta_data, \
open(raw_data_filename, "rb") as raw_data:
@@ -18,7 +18,7 @@ def load_tracab_tracking_data(meta_data_filename: str, raw_data_filename: str, o
)
-def load_metrica_tracking_data(raw_data_home_filename: str, raw_data_away_filename: str, options: dict = None) -> DataSet:
+def load_metrica_tracking_data(raw_data_home_filename: str, raw_data_away_filename: str, options: dict = None) -> Dataset:
serializer = MetricaTrackingSerializer()
with open(raw_data_home_filename, "rb") as raw_data_home, \
open(raw_data_away_filename, "rb") as raw_data_away:
@@ -32,7 +32,7 @@ def load_metrica_tracking_data(raw_data_home_filename: str, raw_data_away_filena
)
-def load_epts_tracking_data(meta_data_filename: str, raw_data_filename: str, options: dict = None) -> DataSet:
+def load_epts_tracking_data(meta_data_filename: str, raw_data_filename: str, options: dict = None) -> Dataset:
serializer = EPTSSerializer()
with open(meta_data_filename, "rb") as meta_data, \
open(raw_data_filename, "rb") as raw_data:
@@ -46,10 +46,10 @@ def load_epts_tracking_data(meta_data_filename: str, raw_data_filename: str, opt
)
-DataSetType = TypeVar('DataSetType')
+DatasetType = TypeVar('DatasetType')
-def transform(data_set: DataSetType, to_orientation=None, to_pitch_dimensions=None) -> DataSetType:
+def transform(dataset: DatasetType, to_orientation=None, to_pitch_dimensions=None) -> DatasetType:
if to_orientation and isinstance(to_orientation, str):
to_orientation = Orientation[to_orientation]
if to_pitch_dimensions and (isinstance(to_pitch_dimensions, list) or isinstance(to_pitch_dimensions, tuple)):
@@ -57,8 +57,8 @@ def transform(data_set: DataSetType, to_orientation=None, to_pitch_dimensions=No
x_dim=Dimension(*to_pitch_dimensions[0]),
y_dim=Dimension(*to_pitch_dimensions[1])
)
- return Transformer.transform_data_set(
- data_set=data_set,
+ return Transformer.transform_dataset(
+ dataset=dataset,
to_orientation=to_orientation,
to_pitch_dimensions=to_pitch_dimensions
)
@@ -83,11 +83,11 @@ def _frame_to_pandas_row_converter(frame: Frame) -> dict:
f'player_away_{jersey_no}_x': position.x,
f'player_away_{jersey_no}_y': position.y
})
-
+
return row
-def to_pandas(data_set: DataSet, _record_converter: Callable = None) -> 'DataFrame':
+def to_pandas(dataset: Dataset, _record_converter: Callable = None) -> 'DataFrame':
try:
import pandas as pd
except ImportError:
@@ -95,13 +95,13 @@ def to_pandas(data_set: DataSet, _record_converter: Callable = None) -> 'DataFra
" install it using: pip install pandas")
if not _record_converter:
- if isinstance(data_set, TrackingDataSet):
+ if isinstance(dataset, TrackingDataset):
_record_converter = _frame_to_pandas_row_converter
else:
raise Exception("Unknown dataset type")
return pd.DataFrame.from_records(
- map(_record_converter, data_set.records)
+ map(_record_converter, dataset.records)
)
diff --git a/kloppy/infra/datasets/core/builder.py b/kloppy/infra/datasets/core/builder.py
index 84e074ab..93866f3b 100644
--- a/kloppy/infra/datasets/core/builder.py
+++ b/kloppy/infra/datasets/core/builder.py
@@ -7,7 +7,7 @@
class DatasetBuilder(metaclass=RegisteredDataset):
@abstractmethod
- def get_data_set_files(self, **kwargs) -> Dict[str, Dict[str, str]]:
+ def get_dataset_urls(self, **kwargs) -> Dict[str, Dict[str, str]]:
raise NotImplementedError
@abstractmethod
diff --git a/kloppy/infra/datasets/core/loading.py b/kloppy/infra/datasets/core/loading.py
index c0005145..260218d9 100644
--- a/kloppy/infra/datasets/core/loading.py
+++ b/kloppy/infra/datasets/core/loading.py
@@ -4,7 +4,7 @@
from typing import Dict, Union
-from kloppy.domain import DataSet, TrackingDataSet
+from kloppy.domain import TrackingDataset
from .registered import _DATASET_REGISTRY
@@ -17,12 +17,12 @@ def download_file(url, local_filename):
f.write(chunk)
-def get_local_files(data_set_name: str, files: Dict[str, str]) -> Dict[str, str]:
+def get_local_files(dataset_name: str, files: Dict[str, str]) -> Dict[str, str]:
datasets_base_dir = os.environ.get('KLOPPY_BASE_DIR', None)
if not datasets_base_dir:
datasets_base_dir = os.path.expanduser('~/kloppy_datasets')
- dataset_base_dir = f'{datasets_base_dir}/{data_set_name}'
+ dataset_base_dir = f'{datasets_base_dir}/{dataset_name}'
if not os.path.exists(dataset_base_dir):
os.makedirs(dataset_base_dir)
@@ -38,15 +38,15 @@ def get_local_files(data_set_name: str, files: Dict[str, str]) -> Dict[str, str]
return local_files
-def load(data_set_name: str, options=None, **dataset_kwargs) -> Union[TrackingDataSet]:
- if data_set_name not in _DATASET_REGISTRY:
- raise ValueError(f"Dataset {data_set_name} not found")
+def load(dataset_name: str, options=None, **dataset_kwargs) -> Union[TrackingDataset]:
+ if dataset_name not in _DATASET_REGISTRY:
+ raise ValueError(f"Dataset {dataset_name} not found")
- builder_cls = _DATASET_REGISTRY[data_set_name]
+ builder_cls = _DATASET_REGISTRY[dataset_name]
builder = builder_cls()
- dataset_remote_files = builder.get_data_set_files(**dataset_kwargs)
- dataset_local_files = get_local_files(data_set_name, dataset_remote_files)
+ dataset_urls = builder.get_dataset_urls(**dataset_kwargs)
+ dataset_local_files = get_local_files(dataset_name, dataset_urls)
file_handlers = {
local_file_key: open(local_file_name, 'rb')
@@ -57,11 +57,11 @@ def load(data_set_name: str, options=None, **dataset_kwargs) -> Union[TrackingDa
try:
serializer_cls = builder.get_serializer_cls()
serializer = serializer_cls()
- data_set = serializer.deserialize(
+ dataset = serializer.deserialize(
inputs=file_handlers,
options=options
)
finally:
for fp in file_handlers.values():
fp.close()
- return data_set
+ return dataset
diff --git a/kloppy/infra/datasets/tracking/metrica.py b/kloppy/infra/datasets/tracking/metrica.py
index a296bb2c..944c2fb8 100644
--- a/kloppy/infra/datasets/tracking/metrica.py
+++ b/kloppy/infra/datasets/tracking/metrica.py
@@ -17,7 +17,7 @@
class MetricaTracking(DatasetBuilder):
- def get_data_set_files(self,**kwargs) -> Dict[str, str]:
+ def get_dataset_urls(self, **kwargs) -> Dict[str, str]:
game = kwargs.get('game', 'game1')
return _DATASET_URLS[game]
diff --git a/kloppy/infra/serializers/event/base.py b/kloppy/infra/serializers/event/base.py
index 16d4af69..271f8183 100644
--- a/kloppy/infra/serializers/event/base.py
+++ b/kloppy/infra/serializers/event/base.py
@@ -2,14 +2,14 @@
from typing import Tuple, Dict
from kloppy.infra.utils import Readable
-from kloppy.domain import EventDataSet
+from kloppy.domain import EventDataset
class EventDataSerializer(ABC):
@abstractmethod
- def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataSet:
+ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataset:
raise NotImplementedError
@abstractmethod
- def serialize(self, data_set: EventDataSet) -> Tuple[str, str]:
+ def serialize(self, dataset: EventDataset) -> Tuple[str, str]:
raise NotImplementedError
diff --git a/kloppy/infra/serializers/event/metrica/serializer.py b/kloppy/infra/serializers/event/metrica/serializer.py
index 252a8b58..b0025054 100644
--- a/kloppy/infra/serializers/event/metrica/serializer.py
+++ b/kloppy/infra/serializers/event/metrica/serializer.py
@@ -3,8 +3,8 @@
import csv
from kloppy.domain import (
- EventDataSet, Team, Point, Period, Orientation,
- DataSetFlag, PitchDimensions, Dimension,
+ EventDataset, Team, Point, Period, Orientation,
+ DatasetFlag, PitchDimensions, Dimension,
AttackingDirection, BallState
)
from kloppy.domain.models.event import (
@@ -80,7 +80,7 @@ def __reduce_game_state(self, event: Event, game_state: __GameState) -> __GameSt
return new_state if new_state else game_state
- def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataSet:
+ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataset:
self.__validate_inputs(inputs)
periods = []
@@ -240,8 +240,8 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Even
Orientation.FIXED_AWAY_HOME
)
- return EventDataSet(
- flags=DataSetFlag.BALL_STATE | DataSetFlag.BALL_OWNING_TEAM,
+ return EventDataset(
+ flags=DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM,
orientation=orientation,
pitch_dimensions=PitchDimensions(
x_dim=Dimension(0, 1),
@@ -251,5 +251,5 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Even
records=events
)
- def serialize(self, data_set: EventDataSet) -> Tuple[str, str]:
+ def serialize(self, dataset: EventDataset) -> Tuple[str, str]:
raise NotImplementedError
diff --git a/kloppy/infra/serializers/tracking/base.py b/kloppy/infra/serializers/tracking/base.py
index 237382ed..e3b942ce 100644
--- a/kloppy/infra/serializers/tracking/base.py
+++ b/kloppy/infra/serializers/tracking/base.py
@@ -2,14 +2,14 @@
from typing import Tuple, Dict
from kloppy.infra.utils import Readable
-from kloppy.domain import DataSet
+from kloppy.domain import Dataset
class TrackingDataSerializer(ABC):
@abstractmethod
- def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> DataSet:
+ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Dataset:
raise NotImplementedError
@abstractmethod
- def serialize(self, data_set: DataSet) -> Tuple[str, str]:
+ def serialize(self, dataset: Dataset) -> Tuple[str, str]:
raise NotImplementedError
diff --git a/kloppy/infra/serializers/tracking/epts/serializer.py b/kloppy/infra/serializers/tracking/epts/serializer.py
index 5e013801..8b751119 100644
--- a/kloppy/infra/serializers/tracking/epts/serializer.py
+++ b/kloppy/infra/serializers/tracking/epts/serializer.py
@@ -1,7 +1,7 @@
from typing import Tuple, Dict
from kloppy.domain import (
- TrackingDataSet, DataSetFlag,
+ TrackingDataset, DatasetFlag,
AttackingDirection,
Frame,
Point,
@@ -61,9 +61,9 @@ def _frame_from_row(row: dict, meta_data: EPTSMetaData) -> Frame:
ball_position=Point(x=row['ball_x'], y=row['ball_y'])
)
- def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataSet:
+ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset:
"""
- Deserialize EPTS tracking data into a `TrackingDataSet`.
+ Deserialize EPTS tracking data into a `TrackingDataset`.
Parameters
----------
@@ -78,7 +78,7 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac
frames that will be returned.
Returns
-------
- data_set : TrackingDataSet
+ dataset : TrackingDataset
Raises
------
-
@@ -91,7 +91,7 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac
>>> serializer = EPTSSerializer()
>>> with open("metadata.xml", "rb") as meta, \
>>> open("raw.dat", "rb") as raw:
- >>> data_set = serializer.deserialize(
+ >>> dataset = serializer.deserialize(
>>> inputs={
>>> 'meta_data': meta,
>>> 'raw_data': raw
@@ -141,8 +141,8 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac
Orientation.FIXED_AWAY_HOME
) if start_attacking_direction else None
- return TrackingDataSet(
- flags=~(DataSetFlag.BALL_STATE | DataSetFlag.BALL_OWNING_TEAM),
+ return TrackingDataset(
+ flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM),
frame_rate=meta_data.frame_rate,
orientation=orientation,
pitch_dimensions=meta_data.pitch_dimensions,
@@ -150,6 +150,6 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac
records=frames
)
- def serialize(self, data_set: TrackingDataSet) -> Tuple[str, str]:
+ def serialize(self, dataset: TrackingDataset) -> Tuple[str, str]:
raise NotImplementedError
diff --git a/kloppy/infra/serializers/tracking/metrica.py b/kloppy/infra/serializers/tracking/metrica.py
index 533a8dcd..4dcd4350 100644
--- a/kloppy/infra/serializers/tracking/metrica.py
+++ b/kloppy/infra/serializers/tracking/metrica.py
@@ -2,7 +2,7 @@
from typing import Tuple, Dict, Iterator
from kloppy.domain import (attacking_direction_from_frame,
- TrackingDataSet,
+ TrackingDataset,
AttackingDirection,
Frame,
Point,
@@ -10,7 +10,7 @@
Orientation,
PitchDimensions,
Dimension,
- DataSetFlag)
+ DatasetFlag)
from kloppy.infra.utils import Readable, performance_logging
from . import TrackingDataSerializer
@@ -97,9 +97,9 @@ def __validate_partials(home_partial_frame: __PartialFrame, away_partial_frame:
if away_partial_frame.team != 'Away':
raise ValueError("raw_data_away contains home team data")
- def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataSet:
+ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset:
"""
- Deserialize Metrica tracking data into a `TrackingDataSet`.
+ Deserialize Metrica tracking data into a `TrackingDataset`.
Parameters
----------
@@ -114,7 +114,7 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac
frames that will be returned.
Returns
-------
- data_set : TrackingDataSet
+ dataset : TrackingDataset
Raises
------
ValueError when both input files don't seem to belong to each other
@@ -128,7 +128,7 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac
>>> with open("Sample_Game_1_RawTrackingData_Away_Team.csv", "rb") as raw_home, \
>>> open("Sample_Game_1_RawTrackingData_Home_Team.csv", "rb") as raw_away:
>>>
- >>> data_set = serializer.deserialize(
+ >>> dataset = serializer.deserialize(
>>> inputs={
>>> 'raw_data_home': raw_home,
>>> 'raw_data_away': raw_away
@@ -198,8 +198,8 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac
Orientation.FIXED_AWAY_HOME
)
- return TrackingDataSet(
- flags=~(DataSetFlag.BALL_STATE | DataSetFlag.BALL_OWNING_TEAM),
+ return TrackingDataset(
+ flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM),
frame_rate=frame_rate,
orientation=orientation,
pitch_dimensions=PitchDimensions(
@@ -210,5 +210,5 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac
records=frames
)
- def serialize(self, data_set: TrackingDataSet) -> Tuple[str, str]:
+ def serialize(self, dataset: TrackingDataset) -> Tuple[str, str]:
raise NotImplementedError
diff --git a/kloppy/infra/serializers/tracking/tracab.py b/kloppy/infra/serializers/tracking/tracab.py
index b33872bd..1fc94b97 100644
--- a/kloppy/infra/serializers/tracking/tracab.py
+++ b/kloppy/infra/serializers/tracking/tracab.py
@@ -3,7 +3,7 @@
from lxml import objectify
from kloppy.domain import (
- TrackingDataSet, DataSetFlag,
+ TrackingDataset, DatasetFlag,
AttackingDirection,
Frame,
Point,
@@ -74,9 +74,9 @@ def __validate_inputs(inputs: Dict[str, Readable]):
if "raw_data" not in inputs:
raise ValueError("Please specify a value for 'raw_data'")
- def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataSet:
+ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset:
"""
- Deserialize TRACAB tracking data into a `TrackingDataSet`.
+ Deserialize TRACAB tracking data into a `TrackingDataset`.
Parameters
----------
@@ -92,7 +92,7 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac
frames that will be returned.
Returns
-------
- data_set : TrackingDataSet
+ dataset : TrackingDataset
Raises
------
-
@@ -105,7 +105,7 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac
>>> serializer = TRACABSerializer()
>>> with open("metadata.xml", "rb") as meta, \
>>> open("raw.dat", "rb") as raw:
- >>> data_set = serializer.deserialize(
+ >>> dataset = serializer.deserialize(
>>> inputs={
>>> 'meta_data': meta,
>>> 'raw_data': raw
@@ -188,8 +188,8 @@ def _iter():
Orientation.FIXED_AWAY_HOME
)
- return TrackingDataSet(
- flags=DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE,
+ return TrackingDataset(
+ flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE,
frame_rate=frame_rate,
orientation=orientation,
pitch_dimensions=PitchDimensions(
@@ -202,6 +202,6 @@ def _iter():
records=frames
)
- def serialize(self, data_set: TrackingDataSet) -> Tuple[str, str]:
+ def serialize(self, dataset: TrackingDataset) -> Tuple[str, str]:
raise NotImplementedError
diff --git a/kloppy/tests/test_enricher.py b/kloppy/tests/test_enricher.py
index 36d797ab..c95641ee 100644
--- a/kloppy/tests/test_enricher.py
+++ b/kloppy/tests/test_enricher.py
@@ -1,4 +1,4 @@
-# from kloppy.domain import TrackingDataSet, EventDataSet, PitchDimensions, Dimension, Orientation, DataSetFlag, Period, \
+# from kloppy.domain import TrackingDataset, EventDataset, PitchDimensions, Dimension, Orientation, DatasetFlag, Period, \
# Frame, TrackingPossessionEnricher, SetPieceEvent, BallState, Team
#
#
@@ -8,8 +8,8 @@
# Period(id=1, start_timestamp=0.0, end_timestamp=10.0),
# Period(id=2, start_timestamp=15.0, end_timestamp=25.0)
# ]
-# tracking_data = TrackingDataSet(
-# flags=~(DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE),
+# tracking_data = TrackingDataset(
+# flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE),
# pitch_dimensions=PitchDimensions(
# x_dim=Dimension(0, 100),
# y_dim=Dimension(-50, 50)
@@ -32,8 +32,8 @@
# periods=periods
# )
#
-# event_data = EventDataSet(
-# flags=DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE,
+# event_data = EventDataset(
+# flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE,
# pitch_dimensions=PitchDimensions(
# x_dim=Dimension(0, 100),
# y_dim=Dimension(-50, 50)
@@ -54,6 +54,6 @@
#
# enricher = TrackingPossessionEnricher()
# enricher.enrich_inplace(
-# tracking_data_set=tracking_data,
-# event_data_set=event_data
+# tracking_dataset=tracking_data,
+# event_dataset=event_data
# )
\ No newline at end of file
diff --git a/kloppy/tests/test_epts.py b/kloppy/tests/test_epts.py
index 9b688f43..5b86515d 100644
--- a/kloppy/tests/test_epts.py
+++ b/kloppy/tests/test_epts.py
@@ -74,17 +74,17 @@ def test_correct_deserialization(self):
with open(f'{base_dir}/files/epts_meta.xml', 'rb') as meta_data, \
open(f'{base_dir}/files/epts_raw.txt', 'rb') as raw_data:
- data_set = serializer.deserialize(
+ dataset = serializer.deserialize(
inputs={
'meta_data': meta_data,
'raw_data': raw_data
}
)
- assert len(data_set.records) == 2
- assert len(data_set.periods) == 0
- assert data_set.orientation == Orientation.FIXED_HOME_AWAY
+ assert len(dataset.records) == 2
+ assert len(dataset.periods) == 0
+ assert dataset.orientation == Orientation.FIXED_HOME_AWAY
- assert data_set.records[0].home_team_player_positions['22'] == Point(x=-769, y=-2013)
- assert data_set.records[0].away_team_player_positions == {}
- assert data_set.records[0].ball_position == Point(x=-2656, y=367)
+ assert dataset.records[0].home_team_player_positions['22'] == Point(x=-769, y=-2013)
+ assert dataset.records[0].away_team_player_positions == {}
+ assert dataset.records[0].ball_position == Point(x=-2656, y=367)
diff --git a/kloppy/tests/test_helpers.py b/kloppy/tests/test_helpers.py
index 42c18666..41d10dab 100644
--- a/kloppy/tests/test_helpers.py
+++ b/kloppy/tests/test_helpers.py
@@ -5,8 +5,8 @@
from kloppy import to_pandas, load_metrica_tracking_data, load_tracab_tracking_data, transform
from kloppy.domain import (
- Period, DataSetFlag, Point, AttackingDirection,
- TrackingDataSet, PitchDimensions, Dimension,
+ Period, DatasetFlag, Point, AttackingDirection,
+ TrackingDataset, PitchDimensions, Dimension,
Orientation, Frame
)
@@ -14,29 +14,29 @@
class TestHelpers:
def test_load_metrica_tracking_data(self):
base_dir = os.path.dirname(__file__)
- data_set = load_metrica_tracking_data(
+ dataset = load_metrica_tracking_data(
f'{base_dir}/files/metrica_home.csv',
f'{base_dir}/files/metrica_away.csv'
)
- assert len(data_set.records) == 6
- assert len(data_set.periods) == 2
+ assert len(dataset.records) == 6
+ assert len(dataset.periods) == 2
def test_load_tracab_tracking_data(self):
base_dir = os.path.dirname(__file__)
- data_set = load_tracab_tracking_data(
+ dataset = load_tracab_tracking_data(
f'{base_dir}/files/tracab_meta.xml',
f'{base_dir}/files/tracab_raw.dat'
)
- assert len(data_set.records) == 5 # only alive=True
- assert len(data_set.periods) == 2
+ assert len(dataset.records) == 5 # only alive=True
+ assert len(dataset.periods) == 2
def _get_dataset(self):
periods = [
Period(id=1, start_timestamp=0.0, end_timestamp=10.0, attacking_direction=AttackingDirection.HOME_AWAY),
Period(id=2, start_timestamp=15.0, end_timestamp=25.0, attacking_direction=AttackingDirection.AWAY_HOME)
]
- tracking_data = TrackingDataSet(
- flags=~(DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE),
+ tracking_data = TrackingDataset(
+ flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE),
pitch_dimensions=PitchDimensions(
x_dim=Dimension(0, 100),
y_dim=Dimension(-50, 50)
@@ -75,14 +75,14 @@ def test_transform(self):
tracking_data = self._get_dataset()
# orientation change AND dimension scale
- transformed_data_set = transform(
+ transformed_dataset = transform(
tracking_data,
to_orientation="AWAY_TEAM",
to_pitch_dimensions=[[0, 1], [0, 1]]
)
- assert transformed_data_set.frames[0].ball_position == Point(x=0, y=1)
- assert transformed_data_set.frames[1].ball_position == Point(x=1, y=0)
+ assert transformed_dataset.frames[0].ball_position == Point(x=0, y=1)
+ assert transformed_dataset.frames[1].ball_position == Point(x=1, y=0)
def test_to_pandas(self):
tracking_data = self._get_dataset()
diff --git a/kloppy/tests/test_metrica.py b/kloppy/tests/test_metrica.py
index 4dd1fb1f..c2a3aaac 100644
--- a/kloppy/tests/test_metrica.py
+++ b/kloppy/tests/test_metrica.py
@@ -12,29 +12,29 @@ def test_correct_deserialization(self):
with open(f'{base_dir}/files/metrica_home.csv', 'rb') as raw_data_home, \
open(f'{base_dir}/files/metrica_away.csv', 'rb') as raw_data_away:
- data_set = serializer.deserialize(
+ dataset = serializer.deserialize(
inputs={
'raw_data_home': raw_data_home,
'raw_data_away': raw_data_away
}
)
- assert len(data_set.records) == 6
- assert len(data_set.periods) == 2
- assert data_set.orientation == Orientation.FIXED_HOME_AWAY
- assert data_set.periods[0] == Period(id=1, start_timestamp=0.04, end_timestamp=0.12,
+ assert len(dataset.records) == 6
+ assert len(dataset.periods) == 2
+ assert dataset.orientation == Orientation.FIXED_HOME_AWAY
+ assert dataset.periods[0] == Period(id=1, start_timestamp=0.04, end_timestamp=0.12,
attacking_direction=AttackingDirection.HOME_AWAY)
- assert data_set.periods[1] == Period(id=2, start_timestamp=5800.16, end_timestamp=5800.24,
+ assert dataset.periods[1] == Period(id=2, start_timestamp=5800.16, end_timestamp=5800.24,
attacking_direction=AttackingDirection.AWAY_HOME)
# make sure data is loaded correctly (including flip y-axis)
- assert data_set.records[0].home_team_player_positions['11'] == Point(x=0.00082, y=1 - 0.48238)
- assert data_set.records[0].away_team_player_positions['25'] == Point(x=0.90509, y=1 - 0.47462)
- assert data_set.records[0].ball_position == Point(x=0.45472, y=1 - 0.38709)
+ assert dataset.records[0].home_team_player_positions['11'] == Point(x=0.00082, y=1 - 0.48238)
+ assert dataset.records[0].away_team_player_positions['25'] == Point(x=0.90509, y=1 - 0.47462)
+ assert dataset.records[0].ball_position == Point(x=0.45472, y=1 - 0.38709)
# make sure player data is only in the frame when the player is at the pitch
- assert '14' not in data_set.records[0].home_team_player_positions
- assert '14' in data_set.records[3].home_team_player_positions
+ assert '14' not in dataset.records[0].home_team_player_positions
+ assert '14' in dataset.records[3].home_team_player_positions
#
# class TestMetricaEvent:
diff --git a/kloppy/tests/test_tracab.py b/kloppy/tests/test_tracab.py
index d7223460..c7a233b8 100644
--- a/kloppy/tests/test_tracab.py
+++ b/kloppy/tests/test_tracab.py
@@ -13,7 +13,7 @@ def test_correct_deserialization(self):
with open(f'{base_dir}/files/tracab_meta.xml', 'rb') as meta_data, \
open(f'{base_dir}/files/tracab_raw.dat', 'rb') as raw_data:
- data_set = serializer.deserialize(
+ dataset = serializer.deserialize(
inputs={
'meta_data': meta_data,
'raw_data': raw_data
@@ -23,25 +23,25 @@ def test_correct_deserialization(self):
}
)
- assert len(data_set.records) == 6
- assert len(data_set.periods) == 2
- assert data_set.orientation == Orientation.FIXED_HOME_AWAY
- assert data_set.periods[0] == Period(id=1, start_timestamp=4.0, end_timestamp=4.08,
+ assert len(dataset.records) == 6
+ assert len(dataset.periods) == 2
+ assert dataset.orientation == Orientation.FIXED_HOME_AWAY
+ assert dataset.periods[0] == Period(id=1, start_timestamp=4.0, end_timestamp=4.08,
attacking_direction=AttackingDirection.HOME_AWAY)
- assert data_set.periods[1] == Period(id=2, start_timestamp=8.0, end_timestamp=8.08,
+ assert dataset.periods[1] == Period(id=2, start_timestamp=8.0, end_timestamp=8.08,
attacking_direction=AttackingDirection.AWAY_HOME)
- assert data_set.records[0].home_team_player_positions['19'] == Point(x=-1234.0, y=-294.0)
- assert data_set.records[0].away_team_player_positions['19'] == Point(x=8889, y=-666)
- assert data_set.records[0].ball_position == Point(x=-27, y=25)
- assert data_set.records[0].ball_state == BallState.ALIVE
- assert data_set.records[0].ball_owning_team == Team.HOME
+ assert dataset.records[0].home_team_player_positions['19'] == Point(x=-1234.0, y=-294.0)
+ assert dataset.records[0].away_team_player_positions['19'] == Point(x=8889, y=-666)
+ assert dataset.records[0].ball_position == Point(x=-27, y=25)
+ assert dataset.records[0].ball_state == BallState.ALIVE
+ assert dataset.records[0].ball_owning_team == Team.HOME
- assert data_set.records[1].ball_owning_team == Team.AWAY
+ assert dataset.records[1].ball_owning_team == Team.AWAY
- assert data_set.records[2].ball_state == BallState.DEAD
+ assert dataset.records[2].ball_state == BallState.DEAD
# make sure player data is only in the frame when the player is at the pitch
- assert '1337' not in data_set.records[0].away_team_player_positions
- assert '1337' in data_set.records[3].away_team_player_positions
+ assert '1337' not in dataset.records[0].away_team_player_positions
+ assert '1337' in dataset.records[3].away_team_player_positions