diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..2483976d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.idea/ +__pycache__/ diff --git a/README.md b/README.md index b1a9f043..36850173 100644 --- a/README.md +++ b/README.md @@ -47,14 +47,14 @@ from kloppy import ( ) # metrica data -data_set = load_metrica_tracking_data('home_file.csv', 'away_file.csv') +dataset = load_metrica_tracking_data('home_file.csv', 'away_file.csv') # or tracab -data_set = load_tracab_tracking_data('meta.xml', 'raw_data.txt') +dataset = load_tracab_tracking_data('meta.xml', 'raw_data.txt') # or epts -data_set = load_epts_tracking_data('meta.xml', 'raw_data.txt') +dataset = load_epts_tracking_data('meta.xml', 'raw_data.txt') -data_set = transform(data_set, pitch_dimensions=[[0, 108], [-34, 34]]) -pandas_data_frame = to_pandas(data_set) +dataset = transform(dataset, pitch_dimensions=[[0, 108], [-34, 34]]) +pandas_data_frame = to_pandas(dataset) ``` ### Public datasets / Very quick start @@ -63,7 +63,7 @@ we added a "dataset loader" which does all the heavy lifting for you: find urls, ```python from kloppy import datasets -data_set = datasets.load("metrica_tracking", options={'sample_rate': 1./12, 'limit': 10}) +dataset = datasets.load("metrica_tracking", options={'sample_rate': 1./12, 'limit': 10}) ``` ### Standardized models @@ -84,7 +84,7 @@ serializer = TRACABSerializer() with open("tracab_data.dat", "rb") as raw, \ open("tracab_metadata.xml", "rb") as meta: - data_set = serializer.deserialize( + dataset = serializer.deserialize( inputs={ 'raw_data': raw, 'meta_data': meta @@ -94,7 +94,7 @@ with open("tracab_data.dat", "rb") as raw, \ } ) - # start working with data_set + # start working with dataset ``` or Metrica data @@ -106,7 +106,7 @@ serializer = MetricaTrackingSerializer() with open("Sample_Game_1_RawTrackingData_Away_Team.csv", "rb") as raw_away, \ open("Sample_Game_1_RawTrackingData_Home_Team.csv", "rb") as raw_home: - data_set = serializer.deserialize( + dataset = serializer.deserialize( inputs={ 'raw_data_home': raw_home, 'raw_data_away': raw_away @@ -116,7 +116,7 @@ with open("Sample_Game_1_RawTrackingData_Away_Team.csv", "rb") as raw_away, \ } ) - # start working with data_set + # start working with dataset ``` @@ -129,7 +129,7 @@ serializer = EPTSSerializer() with open("raw_data.txt", "rb") as raw, \ open("metadata.xml", "rb") as meta: - data_set = serializer.deserialize( + dataset = serializer.deserialize( inputs={ 'raw_data': raw, 'meta_data': meta @@ -139,7 +139,7 @@ with open("raw_data.txt", "rb") as raw, \ } ) - # start working with data_set + # start working with dataset ``` @@ -148,9 +148,9 @@ Data providers use their own pitch dimensions. Some use actual meters while othe ```python from kloppy.domain import Transformer, PitchDimensions, Dimension -# use deserialized `data_set` -new_data_set = Transformer.transform_data_set( - data_set, +# use deserialized `dataset` +new_dataset = Transformer.transform_dataset( + dataset, to_pitch_dimensions=PitchDimensions( x_dim=Dimension(0, 100), y_dim=Dimension(0, 100) @@ -166,8 +166,8 @@ Data providers can use different orientations. Some use a fixed orientation and ```python from kloppy.domain import Transformer, Orientation -new_data_set = Transformer.transform_data_set( - data_set, +new_dataset = Transformer.transform_dataset( + dataset, to_orientation=Orientation.BALL_OWNING_TEAM ) ``` @@ -176,9 +176,9 @@ new_data_set = Transformer.transform_data_set( ```python from kloppy.domain import Transformer, PitchDimensions, Dimension, Orientation -# use deserialized `data_set` -new_data_set = Transformer.transform_data_set( - data_set, +# use deserialized `dataset` +new_dataset = Transformer.transform_dataset( + dataset, to_pitch_dimensions=PitchDimensions( x_dim=Dimension(0, 100), y_dim=Dimension(0, 100) diff --git a/examples/datasets/metrica.py b/examples/datasets/metrica.py index d0322a6b..3564119b 100644 --- a/examples/datasets/metrica.py +++ b/examples/datasets/metrica.py @@ -8,13 +8,13 @@ def main(): """ # The metrica dataset loader loads by default the 'game1' dataset - data_set = datasets.load("metrica_tracking", options={'sample_rate': 1./12, 'limit': 10}) - print(len(data_set.frames)) + dataset = datasets.load("metrica_tracking", options={'sample_rate': 1./12, 'limit': 10}) + print(len(dataset.frames)) # We can pass additional keyword arguments to the loaders to specify a different dataset - data_set = datasets.load("metrica_tracking", options={'limit': 1000}, game='game2') + dataset = datasets.load("metrica_tracking", options={'limit': 1000}, game='game2') - data_frame = to_pandas(data_set) + data_frame = to_pandas(dataset) print(data_frame) diff --git a/examples/playing_time.py b/examples/playing_time.py index 7b352c83..746e6034 100644 --- a/examples/playing_time.py +++ b/examples/playing_time.py @@ -9,10 +9,10 @@ def main(): This example shows how to determine playing time """ - data_set = datasets.load("metrica_tracking", options={'sample_rate': 1./25}) + dataset = datasets.load("metrica_tracking", options={'sample_rate': 1./25}) playing_seconds_per_player = Counter() - for frame in data_set.frames: + for frame in dataset.frames: playing_seconds_per_player.update([int(jersey_no) for jersey_no in frame.home_team_player_positions.keys()]) x = range(len(playing_seconds_per_player)) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index 2f345dcc..03a9bf53 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -93,7 +93,7 @@ def set_attacking_direction(self, attacking_direction: AttackingDirection): self.attacking_direction = attacking_direction -class DataSetFlag(Flag): +class DatasetFlag(Flag): BALL_OWNING_TEAM = 1 BALL_STATE = 2 @@ -108,8 +108,8 @@ class DataRecord(ABC): @dataclass -class DataSet(ABC): - flags: DataSetFlag +class Dataset(ABC): + flags: DatasetFlag pitch_dimensions: PitchDimensions orientation: Orientation periods: List[Period] diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index 02035e70..b5cf33ae 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -6,7 +6,7 @@ from typing import List, Union from .pitch import Point -from .common import DataRecord, DataSet, Team +from .common import DataRecord, Dataset, Team class SubType(Enum): @@ -226,7 +226,7 @@ def event_type(self) -> EventType: @dataclass -class EventDataSet(DataSet): +class EventDataset(Dataset): records: List[Union[ SetPieceEvent, ShotEvent ]] diff --git a/kloppy/domain/models/tracking.py b/kloppy/domain/models/tracking.py index 59bf5280..01d35ed1 100644 --- a/kloppy/domain/models/tracking.py +++ b/kloppy/domain/models/tracking.py @@ -2,7 +2,7 @@ from typing import List, Dict from .common import ( - DataSet, + Dataset, DataRecord ) from .pitch import Point @@ -17,7 +17,7 @@ class Frame(DataRecord): @dataclass -class TrackingDataSet(DataSet): +class TrackingDataset(Dataset): frame_rate: int records: List[Frame] diff --git a/kloppy/domain/services/enrichers/__init__.py b/kloppy/domain/services/enrichers/__init__.py index 8d6e9919..594ca0d3 100644 --- a/kloppy/domain/services/enrichers/__init__.py +++ b/kloppy/domain/services/enrichers/__init__.py @@ -1,12 +1,12 @@ from dataclasses import dataclass -from ...models.tracking import DataSet as TrackingDataSet -from ...models.event import DataSet as EventDataSet -from ...models.common import DataSetFlag, Team, BallState +from ...models.tracking import Dataset as TrackingDataset +from ...models.event import Dataset as EventDataset +from ...models.common import DatasetFlag, Team, BallState class TrackingPossessionEnricher: - def enrich_inplace(self, tracking_data_set: TrackingDataSet, event_data_set: EventDataSet) -> None: + def enrich_inplace(self, tracking_dataset: TrackingDataset, event_dataset: EventDataset) -> None: """ Return an enriched tracking data set. @@ -16,20 +16,20 @@ def enrich_inplace(self, tracking_data_set: TrackingDataSet, event_data_set: Eve they happen. """ - if tracking_data_set.flags & (DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE): + if tracking_dataset.flags & (DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE): return - if not event_data_set.flags & (DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE): - raise Exception("Event DataSet does not contain ball owning team or ball state information") + if not event_dataset.flags & (DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE): + raise Exception("Event Dataset does not contain ball owning team or ball state information") # set some defaults next_event_idx = 0 current_ball_owning_team = None current_ball_state = None - for frame in tracking_data_set.records: - if next_event_idx < len(event_data_set.records): - event = event_data_set.records[next_event_idx] + for frame in tracking_dataset.records: + if next_event_idx < len(event_dataset.records): + event = event_dataset.records[next_event_idx] if frame.period.id == event.period.id and frame.timestamp >= event.timestamp: current_ball_owning_team = event.ball_owning_team current_ball_state = event.ball_state diff --git a/kloppy/domain/services/transformers/__init__.py b/kloppy/domain/services/transformers/__init__.py index a7b79b24..9fb772f5 100644 --- a/kloppy/domain/services/transformers/__init__.py +++ b/kloppy/domain/services/transformers/__init__.py @@ -7,7 +7,7 @@ Frame, Team, AttackingDirection, - TrackingDataSet, DataSetFlag, DataSet, # NOT YET: EventDataSet + TrackingDataset, DatasetFlag, Dataset, # NOT YET: EventDataset ) @@ -78,43 +78,43 @@ def transform_frame(self, frame: Frame) -> Frame: } ) - DataSetType = TypeVar('DataSetType') + DatasetType = TypeVar('DatasetType') @classmethod - def transform_data_set(cls, - data_set: DataSetType, + def transform_dataset(cls, + dataset: DatasetType, to_pitch_dimensions: PitchDimensions = None, - to_orientation: Orientation = None) -> DataSetType: + to_orientation: Orientation = None) -> DatasetType: if not to_pitch_dimensions and not to_orientation: - return data_set + return dataset elif not to_orientation: - to_orientation = data_set.orientation + to_orientation = dataset.orientation elif not to_pitch_dimensions: - to_pitch_dimensions = data_set.pitch_dimensions + to_pitch_dimensions = dataset.pitch_dimensions if to_orientation == Orientation.BALL_OWNING_TEAM: - if not data_set.flags & DataSetFlag.BALL_OWNING_TEAM: + if not dataset.flags & DatasetFlag.BALL_OWNING_TEAM: raise ValueError("Cannot transform to BALL_OWNING_TEAM orientation when dataset doesn't contain " "ball owning team data") transformer = cls( - from_pitch_dimensions=data_set.pitch_dimensions, - from_orientation=data_set.orientation, + from_pitch_dimensions=dataset.pitch_dimensions, + from_orientation=dataset.orientation, to_pitch_dimensions=to_pitch_dimensions, to_orientation=to_orientation ) - if isinstance(data_set, TrackingDataSet): - frames = list(map(transformer.transform_frame, data_set.records)) + if isinstance(dataset, TrackingDataset): + frames = list(map(transformer.transform_frame, dataset.records)) - return TrackingDataSet( - flags=data_set.flags, - frame_rate=data_set.frame_rate, - periods=data_set.periods, + return TrackingDataset( + flags=dataset.flags, + frame_rate=dataset.frame_rate, + periods=dataset.periods, pitch_dimensions=to_pitch_dimensions, orientation=to_orientation, records=frames ) - #elif isinstance(data_set, EventDataSet): - # raise Exception("EventDataSet transformer not implemented yet") + #elif isinstance(dataset, EventDataset): + # raise Exception("EventDataset transformer not implemented yet") else: - raise Exception("Unknown DataSet type") + raise Exception("Unknown Dataset type") diff --git a/kloppy/helpers.py b/kloppy/helpers.py index a48176f6..eb65611d 100644 --- a/kloppy/helpers.py +++ b/kloppy/helpers.py @@ -1,10 +1,10 @@ from typing import Callable, TypeVar from . import TRACABSerializer, MetricaTrackingSerializer, EPTSSerializer -from .domain import DataSet, Frame, TrackingDataSet, Transformer, Orientation, PitchDimensions, Dimension +from .domain import Dataset, Frame, TrackingDataset, Transformer, Orientation, PitchDimensions, Dimension -def load_tracab_tracking_data(meta_data_filename: str, raw_data_filename: str, options: dict = None) -> DataSet: +def load_tracab_tracking_data(meta_data_filename: str, raw_data_filename: str, options: dict = None) -> Dataset: serializer = TRACABSerializer() with open(meta_data_filename, "rb") as meta_data, \ open(raw_data_filename, "rb") as raw_data: @@ -18,7 +18,7 @@ def load_tracab_tracking_data(meta_data_filename: str, raw_data_filename: str, o ) -def load_metrica_tracking_data(raw_data_home_filename: str, raw_data_away_filename: str, options: dict = None) -> DataSet: +def load_metrica_tracking_data(raw_data_home_filename: str, raw_data_away_filename: str, options: dict = None) -> Dataset: serializer = MetricaTrackingSerializer() with open(raw_data_home_filename, "rb") as raw_data_home, \ open(raw_data_away_filename, "rb") as raw_data_away: @@ -32,7 +32,7 @@ def load_metrica_tracking_data(raw_data_home_filename: str, raw_data_away_filena ) -def load_epts_tracking_data(meta_data_filename: str, raw_data_filename: str, options: dict = None) -> DataSet: +def load_epts_tracking_data(meta_data_filename: str, raw_data_filename: str, options: dict = None) -> Dataset: serializer = EPTSSerializer() with open(meta_data_filename, "rb") as meta_data, \ open(raw_data_filename, "rb") as raw_data: @@ -46,10 +46,10 @@ def load_epts_tracking_data(meta_data_filename: str, raw_data_filename: str, opt ) -DataSetType = TypeVar('DataSetType') +DatasetType = TypeVar('DatasetType') -def transform(data_set: DataSetType, to_orientation=None, to_pitch_dimensions=None) -> DataSetType: +def transform(dataset: DatasetType, to_orientation=None, to_pitch_dimensions=None) -> DatasetType: if to_orientation and isinstance(to_orientation, str): to_orientation = Orientation[to_orientation] if to_pitch_dimensions and (isinstance(to_pitch_dimensions, list) or isinstance(to_pitch_dimensions, tuple)): @@ -57,8 +57,8 @@ def transform(data_set: DataSetType, to_orientation=None, to_pitch_dimensions=No x_dim=Dimension(*to_pitch_dimensions[0]), y_dim=Dimension(*to_pitch_dimensions[1]) ) - return Transformer.transform_data_set( - data_set=data_set, + return Transformer.transform_dataset( + dataset=dataset, to_orientation=to_orientation, to_pitch_dimensions=to_pitch_dimensions ) @@ -83,11 +83,11 @@ def _frame_to_pandas_row_converter(frame: Frame) -> dict: f'player_away_{jersey_no}_x': position.x, f'player_away_{jersey_no}_y': position.y }) - + return row -def to_pandas(data_set: DataSet, _record_converter: Callable = None) -> 'DataFrame': +def to_pandas(dataset: Dataset, _record_converter: Callable = None) -> 'DataFrame': try: import pandas as pd except ImportError: @@ -95,13 +95,13 @@ def to_pandas(data_set: DataSet, _record_converter: Callable = None) -> 'DataFra " install it using: pip install pandas") if not _record_converter: - if isinstance(data_set, TrackingDataSet): + if isinstance(dataset, TrackingDataset): _record_converter = _frame_to_pandas_row_converter else: raise Exception("Unknown dataset type") return pd.DataFrame.from_records( - map(_record_converter, data_set.records) + map(_record_converter, dataset.records) ) diff --git a/kloppy/infra/datasets/core/builder.py b/kloppy/infra/datasets/core/builder.py index 84e074ab..93866f3b 100644 --- a/kloppy/infra/datasets/core/builder.py +++ b/kloppy/infra/datasets/core/builder.py @@ -7,7 +7,7 @@ class DatasetBuilder(metaclass=RegisteredDataset): @abstractmethod - def get_data_set_files(self, **kwargs) -> Dict[str, Dict[str, str]]: + def get_dataset_urls(self, **kwargs) -> Dict[str, Dict[str, str]]: raise NotImplementedError @abstractmethod diff --git a/kloppy/infra/datasets/core/loading.py b/kloppy/infra/datasets/core/loading.py index c0005145..260218d9 100644 --- a/kloppy/infra/datasets/core/loading.py +++ b/kloppy/infra/datasets/core/loading.py @@ -4,7 +4,7 @@ from typing import Dict, Union -from kloppy.domain import DataSet, TrackingDataSet +from kloppy.domain import TrackingDataset from .registered import _DATASET_REGISTRY @@ -17,12 +17,12 @@ def download_file(url, local_filename): f.write(chunk) -def get_local_files(data_set_name: str, files: Dict[str, str]) -> Dict[str, str]: +def get_local_files(dataset_name: str, files: Dict[str, str]) -> Dict[str, str]: datasets_base_dir = os.environ.get('KLOPPY_BASE_DIR', None) if not datasets_base_dir: datasets_base_dir = os.path.expanduser('~/kloppy_datasets') - dataset_base_dir = f'{datasets_base_dir}/{data_set_name}' + dataset_base_dir = f'{datasets_base_dir}/{dataset_name}' if not os.path.exists(dataset_base_dir): os.makedirs(dataset_base_dir) @@ -38,15 +38,15 @@ def get_local_files(data_set_name: str, files: Dict[str, str]) -> Dict[str, str] return local_files -def load(data_set_name: str, options=None, **dataset_kwargs) -> Union[TrackingDataSet]: - if data_set_name not in _DATASET_REGISTRY: - raise ValueError(f"Dataset {data_set_name} not found") +def load(dataset_name: str, options=None, **dataset_kwargs) -> Union[TrackingDataset]: + if dataset_name not in _DATASET_REGISTRY: + raise ValueError(f"Dataset {dataset_name} not found") - builder_cls = _DATASET_REGISTRY[data_set_name] + builder_cls = _DATASET_REGISTRY[dataset_name] builder = builder_cls() - dataset_remote_files = builder.get_data_set_files(**dataset_kwargs) - dataset_local_files = get_local_files(data_set_name, dataset_remote_files) + dataset_urls = builder.get_dataset_urls(**dataset_kwargs) + dataset_local_files = get_local_files(dataset_name, dataset_urls) file_handlers = { local_file_key: open(local_file_name, 'rb') @@ -57,11 +57,11 @@ def load(data_set_name: str, options=None, **dataset_kwargs) -> Union[TrackingDa try: serializer_cls = builder.get_serializer_cls() serializer = serializer_cls() - data_set = serializer.deserialize( + dataset = serializer.deserialize( inputs=file_handlers, options=options ) finally: for fp in file_handlers.values(): fp.close() - return data_set + return dataset diff --git a/kloppy/infra/datasets/tracking/metrica.py b/kloppy/infra/datasets/tracking/metrica.py index a296bb2c..944c2fb8 100644 --- a/kloppy/infra/datasets/tracking/metrica.py +++ b/kloppy/infra/datasets/tracking/metrica.py @@ -17,7 +17,7 @@ class MetricaTracking(DatasetBuilder): - def get_data_set_files(self,**kwargs) -> Dict[str, str]: + def get_dataset_urls(self, **kwargs) -> Dict[str, str]: game = kwargs.get('game', 'game1') return _DATASET_URLS[game] diff --git a/kloppy/infra/serializers/event/base.py b/kloppy/infra/serializers/event/base.py index 16d4af69..271f8183 100644 --- a/kloppy/infra/serializers/event/base.py +++ b/kloppy/infra/serializers/event/base.py @@ -2,14 +2,14 @@ from typing import Tuple, Dict from kloppy.infra.utils import Readable -from kloppy.domain import EventDataSet +from kloppy.domain import EventDataset class EventDataSerializer(ABC): @abstractmethod - def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataSet: + def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataset: raise NotImplementedError @abstractmethod - def serialize(self, data_set: EventDataSet) -> Tuple[str, str]: + def serialize(self, dataset: EventDataset) -> Tuple[str, str]: raise NotImplementedError diff --git a/kloppy/infra/serializers/event/metrica/serializer.py b/kloppy/infra/serializers/event/metrica/serializer.py index 252a8b58..b0025054 100644 --- a/kloppy/infra/serializers/event/metrica/serializer.py +++ b/kloppy/infra/serializers/event/metrica/serializer.py @@ -3,8 +3,8 @@ import csv from kloppy.domain import ( - EventDataSet, Team, Point, Period, Orientation, - DataSetFlag, PitchDimensions, Dimension, + EventDataset, Team, Point, Period, Orientation, + DatasetFlag, PitchDimensions, Dimension, AttackingDirection, BallState ) from kloppy.domain.models.event import ( @@ -80,7 +80,7 @@ def __reduce_game_state(self, event: Event, game_state: __GameState) -> __GameSt return new_state if new_state else game_state - def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataSet: + def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataset: self.__validate_inputs(inputs) periods = [] @@ -240,8 +240,8 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Even Orientation.FIXED_AWAY_HOME ) - return EventDataSet( - flags=DataSetFlag.BALL_STATE | DataSetFlag.BALL_OWNING_TEAM, + return EventDataset( + flags=DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM, orientation=orientation, pitch_dimensions=PitchDimensions( x_dim=Dimension(0, 1), @@ -251,5 +251,5 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Even records=events ) - def serialize(self, data_set: EventDataSet) -> Tuple[str, str]: + def serialize(self, dataset: EventDataset) -> Tuple[str, str]: raise NotImplementedError diff --git a/kloppy/infra/serializers/tracking/base.py b/kloppy/infra/serializers/tracking/base.py index 237382ed..e3b942ce 100644 --- a/kloppy/infra/serializers/tracking/base.py +++ b/kloppy/infra/serializers/tracking/base.py @@ -2,14 +2,14 @@ from typing import Tuple, Dict from kloppy.infra.utils import Readable -from kloppy.domain import DataSet +from kloppy.domain import Dataset class TrackingDataSerializer(ABC): @abstractmethod - def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> DataSet: + def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Dataset: raise NotImplementedError @abstractmethod - def serialize(self, data_set: DataSet) -> Tuple[str, str]: + def serialize(self, dataset: Dataset) -> Tuple[str, str]: raise NotImplementedError diff --git a/kloppy/infra/serializers/tracking/epts/serializer.py b/kloppy/infra/serializers/tracking/epts/serializer.py index 5e013801..8b751119 100644 --- a/kloppy/infra/serializers/tracking/epts/serializer.py +++ b/kloppy/infra/serializers/tracking/epts/serializer.py @@ -1,7 +1,7 @@ from typing import Tuple, Dict from kloppy.domain import ( - TrackingDataSet, DataSetFlag, + TrackingDataset, DatasetFlag, AttackingDirection, Frame, Point, @@ -61,9 +61,9 @@ def _frame_from_row(row: dict, meta_data: EPTSMetaData) -> Frame: ball_position=Point(x=row['ball_x'], y=row['ball_y']) ) - def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataSet: + def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset: """ - Deserialize EPTS tracking data into a `TrackingDataSet`. + Deserialize EPTS tracking data into a `TrackingDataset`. Parameters ---------- @@ -78,7 +78,7 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac frames that will be returned. Returns ------- - data_set : TrackingDataSet + dataset : TrackingDataset Raises ------ - @@ -91,7 +91,7 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac >>> serializer = EPTSSerializer() >>> with open("metadata.xml", "rb") as meta, \ >>> open("raw.dat", "rb") as raw: - >>> data_set = serializer.deserialize( + >>> dataset = serializer.deserialize( >>> inputs={ >>> 'meta_data': meta, >>> 'raw_data': raw @@ -141,8 +141,8 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac Orientation.FIXED_AWAY_HOME ) if start_attacking_direction else None - return TrackingDataSet( - flags=~(DataSetFlag.BALL_STATE | DataSetFlag.BALL_OWNING_TEAM), + return TrackingDataset( + flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), frame_rate=meta_data.frame_rate, orientation=orientation, pitch_dimensions=meta_data.pitch_dimensions, @@ -150,6 +150,6 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac records=frames ) - def serialize(self, data_set: TrackingDataSet) -> Tuple[str, str]: + def serialize(self, dataset: TrackingDataset) -> Tuple[str, str]: raise NotImplementedError diff --git a/kloppy/infra/serializers/tracking/metrica.py b/kloppy/infra/serializers/tracking/metrica.py index 533a8dcd..4dcd4350 100644 --- a/kloppy/infra/serializers/tracking/metrica.py +++ b/kloppy/infra/serializers/tracking/metrica.py @@ -2,7 +2,7 @@ from typing import Tuple, Dict, Iterator from kloppy.domain import (attacking_direction_from_frame, - TrackingDataSet, + TrackingDataset, AttackingDirection, Frame, Point, @@ -10,7 +10,7 @@ Orientation, PitchDimensions, Dimension, - DataSetFlag) + DatasetFlag) from kloppy.infra.utils import Readable, performance_logging from . import TrackingDataSerializer @@ -97,9 +97,9 @@ def __validate_partials(home_partial_frame: __PartialFrame, away_partial_frame: if away_partial_frame.team != 'Away': raise ValueError("raw_data_away contains home team data") - def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataSet: + def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset: """ - Deserialize Metrica tracking data into a `TrackingDataSet`. + Deserialize Metrica tracking data into a `TrackingDataset`. Parameters ---------- @@ -114,7 +114,7 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac frames that will be returned. Returns ------- - data_set : TrackingDataSet + dataset : TrackingDataset Raises ------ ValueError when both input files don't seem to belong to each other @@ -128,7 +128,7 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac >>> with open("Sample_Game_1_RawTrackingData_Away_Team.csv", "rb") as raw_home, \ >>> open("Sample_Game_1_RawTrackingData_Home_Team.csv", "rb") as raw_away: >>> - >>> data_set = serializer.deserialize( + >>> dataset = serializer.deserialize( >>> inputs={ >>> 'raw_data_home': raw_home, >>> 'raw_data_away': raw_away @@ -198,8 +198,8 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac Orientation.FIXED_AWAY_HOME ) - return TrackingDataSet( - flags=~(DataSetFlag.BALL_STATE | DataSetFlag.BALL_OWNING_TEAM), + return TrackingDataset( + flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), frame_rate=frame_rate, orientation=orientation, pitch_dimensions=PitchDimensions( @@ -210,5 +210,5 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac records=frames ) - def serialize(self, data_set: TrackingDataSet) -> Tuple[str, str]: + def serialize(self, dataset: TrackingDataset) -> Tuple[str, str]: raise NotImplementedError diff --git a/kloppy/infra/serializers/tracking/tracab.py b/kloppy/infra/serializers/tracking/tracab.py index b33872bd..1fc94b97 100644 --- a/kloppy/infra/serializers/tracking/tracab.py +++ b/kloppy/infra/serializers/tracking/tracab.py @@ -3,7 +3,7 @@ from lxml import objectify from kloppy.domain import ( - TrackingDataSet, DataSetFlag, + TrackingDataset, DatasetFlag, AttackingDirection, Frame, Point, @@ -74,9 +74,9 @@ def __validate_inputs(inputs: Dict[str, Readable]): if "raw_data" not in inputs: raise ValueError("Please specify a value for 'raw_data'") - def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataSet: + def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset: """ - Deserialize TRACAB tracking data into a `TrackingDataSet`. + Deserialize TRACAB tracking data into a `TrackingDataset`. Parameters ---------- @@ -92,7 +92,7 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac frames that will be returned. Returns ------- - data_set : TrackingDataSet + dataset : TrackingDataset Raises ------ - @@ -105,7 +105,7 @@ def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> Trac >>> serializer = TRACABSerializer() >>> with open("metadata.xml", "rb") as meta, \ >>> open("raw.dat", "rb") as raw: - >>> data_set = serializer.deserialize( + >>> dataset = serializer.deserialize( >>> inputs={ >>> 'meta_data': meta, >>> 'raw_data': raw @@ -188,8 +188,8 @@ def _iter(): Orientation.FIXED_AWAY_HOME ) - return TrackingDataSet( - flags=DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE, + return TrackingDataset( + flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, frame_rate=frame_rate, orientation=orientation, pitch_dimensions=PitchDimensions( @@ -202,6 +202,6 @@ def _iter(): records=frames ) - def serialize(self, data_set: TrackingDataSet) -> Tuple[str, str]: + def serialize(self, dataset: TrackingDataset) -> Tuple[str, str]: raise NotImplementedError diff --git a/kloppy/tests/test_enricher.py b/kloppy/tests/test_enricher.py index 36d797ab..c95641ee 100644 --- a/kloppy/tests/test_enricher.py +++ b/kloppy/tests/test_enricher.py @@ -1,4 +1,4 @@ -# from kloppy.domain import TrackingDataSet, EventDataSet, PitchDimensions, Dimension, Orientation, DataSetFlag, Period, \ +# from kloppy.domain import TrackingDataset, EventDataset, PitchDimensions, Dimension, Orientation, DatasetFlag, Period, \ # Frame, TrackingPossessionEnricher, SetPieceEvent, BallState, Team # # @@ -8,8 +8,8 @@ # Period(id=1, start_timestamp=0.0, end_timestamp=10.0), # Period(id=2, start_timestamp=15.0, end_timestamp=25.0) # ] -# tracking_data = TrackingDataSet( -# flags=~(DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE), +# tracking_data = TrackingDataset( +# flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE), # pitch_dimensions=PitchDimensions( # x_dim=Dimension(0, 100), # y_dim=Dimension(-50, 50) @@ -32,8 +32,8 @@ # periods=periods # ) # -# event_data = EventDataSet( -# flags=DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE, +# event_data = EventDataset( +# flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, # pitch_dimensions=PitchDimensions( # x_dim=Dimension(0, 100), # y_dim=Dimension(-50, 50) @@ -54,6 +54,6 @@ # # enricher = TrackingPossessionEnricher() # enricher.enrich_inplace( -# tracking_data_set=tracking_data, -# event_data_set=event_data +# tracking_dataset=tracking_data, +# event_dataset=event_data # ) \ No newline at end of file diff --git a/kloppy/tests/test_epts.py b/kloppy/tests/test_epts.py index 9b688f43..5b86515d 100644 --- a/kloppy/tests/test_epts.py +++ b/kloppy/tests/test_epts.py @@ -74,17 +74,17 @@ def test_correct_deserialization(self): with open(f'{base_dir}/files/epts_meta.xml', 'rb') as meta_data, \ open(f'{base_dir}/files/epts_raw.txt', 'rb') as raw_data: - data_set = serializer.deserialize( + dataset = serializer.deserialize( inputs={ 'meta_data': meta_data, 'raw_data': raw_data } ) - assert len(data_set.records) == 2 - assert len(data_set.periods) == 0 - assert data_set.orientation == Orientation.FIXED_HOME_AWAY + assert len(dataset.records) == 2 + assert len(dataset.periods) == 0 + assert dataset.orientation == Orientation.FIXED_HOME_AWAY - assert data_set.records[0].home_team_player_positions['22'] == Point(x=-769, y=-2013) - assert data_set.records[0].away_team_player_positions == {} - assert data_set.records[0].ball_position == Point(x=-2656, y=367) + assert dataset.records[0].home_team_player_positions['22'] == Point(x=-769, y=-2013) + assert dataset.records[0].away_team_player_positions == {} + assert dataset.records[0].ball_position == Point(x=-2656, y=367) diff --git a/kloppy/tests/test_helpers.py b/kloppy/tests/test_helpers.py index 42c18666..41d10dab 100644 --- a/kloppy/tests/test_helpers.py +++ b/kloppy/tests/test_helpers.py @@ -5,8 +5,8 @@ from kloppy import to_pandas, load_metrica_tracking_data, load_tracab_tracking_data, transform from kloppy.domain import ( - Period, DataSetFlag, Point, AttackingDirection, - TrackingDataSet, PitchDimensions, Dimension, + Period, DatasetFlag, Point, AttackingDirection, + TrackingDataset, PitchDimensions, Dimension, Orientation, Frame ) @@ -14,29 +14,29 @@ class TestHelpers: def test_load_metrica_tracking_data(self): base_dir = os.path.dirname(__file__) - data_set = load_metrica_tracking_data( + dataset = load_metrica_tracking_data( f'{base_dir}/files/metrica_home.csv', f'{base_dir}/files/metrica_away.csv' ) - assert len(data_set.records) == 6 - assert len(data_set.periods) == 2 + assert len(dataset.records) == 6 + assert len(dataset.periods) == 2 def test_load_tracab_tracking_data(self): base_dir = os.path.dirname(__file__) - data_set = load_tracab_tracking_data( + dataset = load_tracab_tracking_data( f'{base_dir}/files/tracab_meta.xml', f'{base_dir}/files/tracab_raw.dat' ) - assert len(data_set.records) == 5 # only alive=True - assert len(data_set.periods) == 2 + assert len(dataset.records) == 5 # only alive=True + assert len(dataset.periods) == 2 def _get_dataset(self): periods = [ Period(id=1, start_timestamp=0.0, end_timestamp=10.0, attacking_direction=AttackingDirection.HOME_AWAY), Period(id=2, start_timestamp=15.0, end_timestamp=25.0, attacking_direction=AttackingDirection.AWAY_HOME) ] - tracking_data = TrackingDataSet( - flags=~(DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE), + tracking_data = TrackingDataset( + flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE), pitch_dimensions=PitchDimensions( x_dim=Dimension(0, 100), y_dim=Dimension(-50, 50) @@ -75,14 +75,14 @@ def test_transform(self): tracking_data = self._get_dataset() # orientation change AND dimension scale - transformed_data_set = transform( + transformed_dataset = transform( tracking_data, to_orientation="AWAY_TEAM", to_pitch_dimensions=[[0, 1], [0, 1]] ) - assert transformed_data_set.frames[0].ball_position == Point(x=0, y=1) - assert transformed_data_set.frames[1].ball_position == Point(x=1, y=0) + assert transformed_dataset.frames[0].ball_position == Point(x=0, y=1) + assert transformed_dataset.frames[1].ball_position == Point(x=1, y=0) def test_to_pandas(self): tracking_data = self._get_dataset() diff --git a/kloppy/tests/test_metrica.py b/kloppy/tests/test_metrica.py index 4dd1fb1f..c2a3aaac 100644 --- a/kloppy/tests/test_metrica.py +++ b/kloppy/tests/test_metrica.py @@ -12,29 +12,29 @@ def test_correct_deserialization(self): with open(f'{base_dir}/files/metrica_home.csv', 'rb') as raw_data_home, \ open(f'{base_dir}/files/metrica_away.csv', 'rb') as raw_data_away: - data_set = serializer.deserialize( + dataset = serializer.deserialize( inputs={ 'raw_data_home': raw_data_home, 'raw_data_away': raw_data_away } ) - assert len(data_set.records) == 6 - assert len(data_set.periods) == 2 - assert data_set.orientation == Orientation.FIXED_HOME_AWAY - assert data_set.periods[0] == Period(id=1, start_timestamp=0.04, end_timestamp=0.12, + assert len(dataset.records) == 6 + assert len(dataset.periods) == 2 + assert dataset.orientation == Orientation.FIXED_HOME_AWAY + assert dataset.periods[0] == Period(id=1, start_timestamp=0.04, end_timestamp=0.12, attacking_direction=AttackingDirection.HOME_AWAY) - assert data_set.periods[1] == Period(id=2, start_timestamp=5800.16, end_timestamp=5800.24, + assert dataset.periods[1] == Period(id=2, start_timestamp=5800.16, end_timestamp=5800.24, attacking_direction=AttackingDirection.AWAY_HOME) # make sure data is loaded correctly (including flip y-axis) - assert data_set.records[0].home_team_player_positions['11'] == Point(x=0.00082, y=1 - 0.48238) - assert data_set.records[0].away_team_player_positions['25'] == Point(x=0.90509, y=1 - 0.47462) - assert data_set.records[0].ball_position == Point(x=0.45472, y=1 - 0.38709) + assert dataset.records[0].home_team_player_positions['11'] == Point(x=0.00082, y=1 - 0.48238) + assert dataset.records[0].away_team_player_positions['25'] == Point(x=0.90509, y=1 - 0.47462) + assert dataset.records[0].ball_position == Point(x=0.45472, y=1 - 0.38709) # make sure player data is only in the frame when the player is at the pitch - assert '14' not in data_set.records[0].home_team_player_positions - assert '14' in data_set.records[3].home_team_player_positions + assert '14' not in dataset.records[0].home_team_player_positions + assert '14' in dataset.records[3].home_team_player_positions # # class TestMetricaEvent: diff --git a/kloppy/tests/test_tracab.py b/kloppy/tests/test_tracab.py index d7223460..c7a233b8 100644 --- a/kloppy/tests/test_tracab.py +++ b/kloppy/tests/test_tracab.py @@ -13,7 +13,7 @@ def test_correct_deserialization(self): with open(f'{base_dir}/files/tracab_meta.xml', 'rb') as meta_data, \ open(f'{base_dir}/files/tracab_raw.dat', 'rb') as raw_data: - data_set = serializer.deserialize( + dataset = serializer.deserialize( inputs={ 'meta_data': meta_data, 'raw_data': raw_data @@ -23,25 +23,25 @@ def test_correct_deserialization(self): } ) - assert len(data_set.records) == 6 - assert len(data_set.periods) == 2 - assert data_set.orientation == Orientation.FIXED_HOME_AWAY - assert data_set.periods[0] == Period(id=1, start_timestamp=4.0, end_timestamp=4.08, + assert len(dataset.records) == 6 + assert len(dataset.periods) == 2 + assert dataset.orientation == Orientation.FIXED_HOME_AWAY + assert dataset.periods[0] == Period(id=1, start_timestamp=4.0, end_timestamp=4.08, attacking_direction=AttackingDirection.HOME_AWAY) - assert data_set.periods[1] == Period(id=2, start_timestamp=8.0, end_timestamp=8.08, + assert dataset.periods[1] == Period(id=2, start_timestamp=8.0, end_timestamp=8.08, attacking_direction=AttackingDirection.AWAY_HOME) - assert data_set.records[0].home_team_player_positions['19'] == Point(x=-1234.0, y=-294.0) - assert data_set.records[0].away_team_player_positions['19'] == Point(x=8889, y=-666) - assert data_set.records[0].ball_position == Point(x=-27, y=25) - assert data_set.records[0].ball_state == BallState.ALIVE - assert data_set.records[0].ball_owning_team == Team.HOME + assert dataset.records[0].home_team_player_positions['19'] == Point(x=-1234.0, y=-294.0) + assert dataset.records[0].away_team_player_positions['19'] == Point(x=8889, y=-666) + assert dataset.records[0].ball_position == Point(x=-27, y=25) + assert dataset.records[0].ball_state == BallState.ALIVE + assert dataset.records[0].ball_owning_team == Team.HOME - assert data_set.records[1].ball_owning_team == Team.AWAY + assert dataset.records[1].ball_owning_team == Team.AWAY - assert data_set.records[2].ball_state == BallState.DEAD + assert dataset.records[2].ball_state == BallState.DEAD # make sure player data is only in the frame when the player is at the pitch - assert '1337' not in data_set.records[0].away_team_player_positions - assert '1337' in data_set.records[3].away_team_player_positions + assert '1337' not in dataset.records[0].away_team_player_positions + assert '1337' in dataset.records[3].away_team_player_positions