From 97fc6f813c331462035d221e1959a1e09acc561a Mon Sep 17 00:00:00 2001
From: Pieter Robberechts <pieter.robberechts@kuleuven.be>
Date: Tue, 4 Jun 2024 15:32:20 +0200
Subject: [PATCH] dev

---
 soccer_xg/__init__.py         |  24 +-
 soccer_xg/attributes.py       | 408 ++++++++++++++--------
 soccer_xg/calibration.py      | 110 +-----
 soccer_xg/data/hdf.py         |   2 +-
 soccer_xg/metrics.py          | 179 ++++++++--
 soccer_xg/ml/logreg.py        |  82 ++---
 soccer_xg/ml/mlp.py           |  57 +---
 soccer_xg/ml/pipeline.py      |  21 ++
 soccer_xg/ml/preprocessing.py |   3 +-
 soccer_xg/ml/xgboost.py       |  60 +---
 soccer_xg/utils.py            |  29 +-
 soccer_xg/visualisation.py    | 512 ++++++++++++++++++++++------
 soccer_xg/xg.py               | 613 ++++++++++++++++++++++------------
 13 files changed, 1309 insertions(+), 791 deletions(-)

diff --git a/soccer_xg/__init__.py b/soccer_xg/__init__.py
index e17d960..569ab07 100644
--- a/soccer_xg/__init__.py
+++ b/soccer_xg/__init__.py
@@ -4,7 +4,27 @@
 :copyright: (c) 2023 by DTAI KU Leuven.
 :license: Apache v2, see LICENSE for more details.
 """
-from soccer_xg.xg import XGModel
+from soccer_xg.data import Dataset, HDFDataset, SQLDataset
+from soccer_xg.xg import (
+    DatasetTransformer,
+    XGModel,
+    PenaltyXGModel,
+    FreekickXGModel,
+    BasicOpenplayXGModel,
+    AdvancedOpenplayXGModel,
+    StatsBombOpenplayXGModel,
+)
 
 __version__ = '0.0.1'
-__all__ = ['XGModel']
+__all__ = [
+    'Dataset',
+    'HDFDataset',
+    'SQLDataset',
+    'DatasetTransformer',
+    'XGModel',
+    'PenaltyXGModel',
+    'FreekickXGModel',
+    'BasicOpenplayXGModel',
+    'AdvancedOpenplayXGModel',
+    'StatsBombOpenplayXGModel',
+]
diff --git a/soccer_xg/attributes.py b/soccer_xg/attributes.py
index 06e8d16..9dfe207 100644
--- a/soccer_xg/attributes.py
+++ b/soccer_xg/attributes.py
@@ -22,7 +22,7 @@
    a :class:`pandas.DataFrame` of events and a series with event IDs to select
    the shots for which attributes should be computed.
 
-The types are specified using the ``ftype`` decorator. Only functions, which
+The types are specified using the ``feature`` decorator. Only functions, which
 have a parameter called "ftype" are seen by soccer-xg as a generator. Others
 will not be calculated.
 
@@ -32,13 +32,13 @@
 """
 import math
 import warnings
+from typing import Callable
 
 import numpy as np
 import pandas as pd
-
-from socceraction import spadl
 import socceraction.spadl.config as spadlcfg
 import socceraction.vaep.features as fs
+from socceraction import spadl
 from socceraction.vaep.features import simple
 
 _spadl_cfg = {
@@ -56,14 +56,43 @@
     "circle_radius": 9.15,
 }
 
+# Typing
+Events = pd.DataFrame
+Actions = pd.DataFrame
+GameStates = list[Actions]
+GameStateAttributeGenerator = Callable[[GameStates], pd.DataFrame]
+ActionsAttributeGenerator = Callable[[Actions, pd.Series], pd.DataFrame]
+EventsAttributeGenerator = Callable[[Events, pd.Series], pd.DataFrame]
+AttributeGenerator = (
+    EventsAttributeGenerator | ActionsAttributeGenerator | GameStateAttributeGenerator
+)
 
-def ftype(value):
-    """
-    This method returns a decorator that sets the property key of the function to value
-    """
+
+# Decorators
+
+_FEATURE_REGISTRY = {}
+_LABEL_REGISTRY = {}
+
+def feature(ftype, features=None):
+    """A decorator that sets the property 'ftype' of the function to value."""
 
     def decorate_func(func):
-        setattr(func, "ftype", value)
+        setattr(func, "ftype", ftype)
+        if features is not None:
+            for feature in features:
+                _FEATURE_REGISTRY[feature] = func
+        return func
+
+    return decorate_func
+
+def label(ftype, labels=None):
+    """A decorator that sets the property 'ftype' of the function to value."""
+
+    def decorate_func(func):
+        setattr(func, "ftype", ftype)
+        if labels is not None:
+            for label in labels:
+                _LABEL_REGISTRY[label] = func
         return func
 
     return decorate_func
@@ -73,50 +102,22 @@ def decorate_func(func):
 # SoccerAction-style gamestate attributes
 # ############################################################################
 
-actiontype = ftype("gamestates")(fs.actiontype)
-actiontype_onehot = ftype("gamestates")(fs.actiontype_onehot)
-result = ftype("gamestates")(fs.result)
-result_onehot = ftype("gamestates")(fs.result_onehot)
-actiontype_result_onehot = ftype("gamestates")(fs.actiontype_result_onehot)
-bodypart = ftype("gamestates")(fs.bodypart)
-bodypart_onehot = ftype("gamestates")(fs.bodypart_onehot)
-startlocation = ftype("gamestates")(fs.startlocation)
-endlocation = ftype("gamestates")(fs.endlocation)
-startpolar = ftype("gamestates")(fs.startpolar)
-endpolar = ftype("gamestates")(fs.endpolar)
-team = ftype("gamestates")(fs.team)
-movement = ftype("gamestates")(fs.movement)
-time_delta = ftype("gamestates")(fs.time_delta)
-space_delta = ftype("gamestates")(fs.space_delta)
-
-
-@ftype("gamestates")
-def speed(gamestates):
-    """Get the movement speed of the ball between the last and previous actions.
-
-    Parameters
-    ----------
-    gamestates : list(pd.DataFrame)
-        The game states of a game.
-
-    Returns
-    -------
-    pd.DataFrame
-        A dataframe with a column for the horizontal ('speedx_a0i'), vertical
-        ('speedy_a0i') and total ('speed_a0i') movement speed of the ball
-        between each <nb_prev_actions> action ai and action a0.
-    """
-    a0 = gamestates[0]
-    spaced = pd.DataFrame(index=a0.index)
-    for i, a in enumerate(gamestates[1:]):
-        dt = a0.time_seconds - a.time_seconds
-        dt[dt < 1] = 1
-        dx = a.end_x - a0.start_x
-        spaced["speedx_a0" + (str(i + 1))] = dx.abs() / dt
-        dy = a.end_y - a0.start_y
-        spaced["speedy_a0" + (str(i + 1))] = dy.abs() / dt
-        spaced["speed_a0" + (str(i + 1))] = np.sqrt(dx**2 + dy**2) / dt
-    return spaced
+actiontype = feature("gamestates")(fs.actiontype)
+actiontype_onehot = feature("gamestates")(fs.actiontype_onehot)
+result = feature("gamestates")(fs.result)
+result_onehot = feature("gamestates")(fs.result_onehot)
+actiontype_result_onehot = feature("gamestates")(fs.actiontype_result_onehot)
+bodypart = feature("gamestates", ["bodypart"])(fs.bodypart)
+bodypart_onehot = feature("gamestates")(fs.bodypart_onehot)
+startlocation = feature("gamestates")(fs.startlocation)
+endlocation = feature("gamestates")(fs.endlocation)
+startpolar = feature("gamestates")(fs.startpolar)
+endpolar = feature("gamestates")(fs.endpolar)
+team = feature("gamestates")(fs.team)
+movement = feature("gamestates")(fs.movement)
+time_delta = feature("gamestates")(fs.time_delta)
+space_delta = feature("gamestates")(fs.space_delta)
+speed = feature("gamestates")(fs.speed)
 
 
 # ############################################################################
@@ -124,7 +125,7 @@ def speed(gamestates):
 # ############################################################################
 
 
-@ftype("actions")
+@label("actions", ["goal"])
 def goal_from_shot(actions, shot_mask):
     """Determine whether a goal was scored from the current action.
 
@@ -150,7 +151,34 @@ def goal_from_shot(actions, shot_mask):
     return goaldf
 
 
-@ftype("actions")
+@feature("actions", ["type_name_shot"])
+def shot_type(actions, shot_mask):
+    """Compute the shot's action type.
+
+    Parameters
+    ----------
+    actions : pd.DataFrame
+        The actions of a game in SPADL format.
+    shot_mask : pd.Series
+        A boolean mask to select the shots for which attributes should be
+        computed.
+
+    Returns
+    -------
+    pd.DataFrame
+        A dataframe with a column for the shot's action type ('type_name_shot').
+    """
+    shots = actions.loc[shot_mask]
+    shottypedf = pd.DataFrame(index=shots.index)
+    shottypedf["type_name_shot"] = pd.Categorical(
+        shots["type_name"],
+        categories=["shot", "shot_penalty", "shot_freekick"],
+        ordered=False,
+    )
+    return shottypedf
+
+
+@feature("actions", ["dist_shot"])
 def shot_dist(actions, shot_mask):
     """Compute the distance to the middle of the goal.
 
@@ -176,7 +204,7 @@ def shot_dist(actions, shot_mask):
     return distdf
 
 
-@ftype("actions")
+@feature("actions", ["dx_shot", "dy_shot"])
 def shot_location(actions, shot_mask):
     """Compute the distance to the mid line and goal line.
 
@@ -204,7 +232,7 @@ def shot_location(actions, shot_mask):
     return locationdf
 
 
-@ftype("actions")
+@feature("actions", ["angle_shot"])
 def shot_angle(actions, shot_mask):
     """Compute the angle to the middle of the goal.
 
@@ -234,7 +262,7 @@ def shot_angle(actions, shot_mask):
     return polardf
 
 
-@ftype("actions")
+@feature("actions", ["visible_angle_shot"])
 def shot_visible_angle(actions, shot_mask):
     """Compute the angle formed between the shot location and the two goal posts.
 
@@ -280,7 +308,7 @@ def shot_visible_angle(actions, shot_mask):
     return angledf
 
 
-@ftype("actions")
+@feature("actions", ["relative_angle_shot"])
 def shot_relative_angle(actions, shot_mask):
     """Compute the relative angle to goal.
 
@@ -326,7 +354,7 @@ def shot_relative_angle(actions, shot_mask):
     return angledf[["relative_angle_shot"]]
 
 
-@ftype("actions")
+@feature("actions", ["bodypart_name_shot"])
 def shot_bodypart(actions, shot_mask):
     """Return the body part used to take the shot.
 
@@ -354,7 +382,7 @@ def shot_bodypart(actions, shot_mask):
     return bodypartdf
 
 
-@ftype("actions")
+@feature("actions", ["detailed_bodypart_name_shot"])
 def shot_bodypart_detailed(actions, shot_mask):
     """Return the body part used to take the shot, distinguishing between
     the left and right foot.
@@ -375,13 +403,13 @@ def shot_bodypart_detailed(actions, shot_mask):
     """
     shots = actions.loc[shot_mask]
     bodypartdf = pd.DataFrame(index=shots.index)
-    bodypartdf["bodypart_name_shot"] = pd.Categorical(
+    bodypartdf["detailed_bodypart_name_shot"] = pd.Categorical(
         shots["bodypart_name"], categories=spadlcfg.bodyparts, ordered=False
     )
     return bodypartdf
 
 
-@ftype("actions")
+@feature("actions", [f"bodypart_{type_name}_shot" for type_name in spadlcfg.bodyparts])
 def shot_bodypart_onehot(actions, shot_mask):
     """Return the one-hot encoded body part used to take the shot.
 
@@ -412,7 +440,7 @@ def shot_bodypart_onehot(actions, shot_mask):
     return X
 
 
-@ftype("actions")
+@feature("actions", ["post_dribble", "carry_length"])
 def post_dribble(actions, shot_mask):
     """Compute features describing the dribble before the shot.
 
@@ -442,15 +470,16 @@ def post_dribble(actions, shot_mask):
     for idx in actions.loc[shot_mask].index:
         carry_length = 0
         maybe_carry = actions.loc[:idx].iloc[-1]
-        if maybe_carry.type_name == "dribble":
+        post_dribble = maybe_carry.type_name == "dribble"
+        if post_dribble:
             dx = maybe_carry.end_x - maybe_carry.start_x
             dy = maybe_carry.end_y - maybe_carry.start_y
             carry_length = math.sqrt(dx**2 + dy**2)
-        df[idx] = {"carry_length": carry_length}
+        df[idx] = {"carry_length": carry_length, "post_dribble": post_dribble}
     return pd.DataFrame.from_dict(df, orient="index")
 
 
-@ftype("actions")
+@feature("actions", ["type_name_assist"])
 def assist_type(actions, shot_mask):
     """Return the assist type.
 
@@ -504,11 +533,11 @@ def assist_type(actions, shot_mask):
         assist_type = assist.type_name if assist else "direct"
         # TODO (assist_technique): The technique for crosses one of straight,
         # inswinging, or out swinging and whether the pass was a through ball
-        df[idx] = {"type_assist": assist_type}
+        df[idx] = {"type_name_assist": assist_type}
     return pd.DataFrame.from_dict(df, orient="index")
 
 
-@ftype("actions")
+@feature("actions", ["fastbreak"])
 def fastbreak(actions, shot_mask):
     """Get whether the shot was part of a counter attack.
 
@@ -559,7 +588,7 @@ def fastbreak(actions, shot_mask):
     return pd.DataFrame.from_dict(df, orient="index")
 
 
-@ftype("actions")
+@feature("actions", ["rebound", "time_prev_shot"])
 def rebound(actions, shot_mask):
     """Get whether the shot was a rebound.
 
@@ -718,6 +747,33 @@ def fn(point):
 
 
 def triangular_grid(name, angle_bins, dist_bins, symmetrical=False):
+    """Get the location of a shot as a cell index from a triangular grid.
+
+    Paramters
+    ---------
+    name : str
+        Name of the feature.
+    angle_bins : list
+        A monotonically increasing array of bin edges for the angle of the
+        shot wrt the center of the goal, including the rightmost edge. In degrees.
+    dist_bins : list
+        A monotonically increasing array of bin edges for the distance of the
+        shot to the center of the goal, including the rightmost edge. In meters.
+    symmetrical : bool
+        Whether to use a symmetrical grid. Default: False.
+
+    Examples
+    --------
+    >>> shot_zone = feature("gamestates", ["shot_zone"])(
+    >>>     triangular_grid(
+    >>>         "shot_zone",
+    >>>         [-50, -20, 20, 50],
+    >>>         [2, 4, 8, 11, 16, 24, 34, 50],
+    >>>         symmetrical=True,
+    >>>     )
+    >>> )
+    """
+
     @simple
     def fn(actions):
         zonedf = startpolar(actions)
@@ -741,12 +797,39 @@ def fn(actions):
     return fn
 
 
-def rectangular_grid(name, x_bins, y_bins, symmetrical=False, cfg=_spadl_cfg):
+def rectangular_grid(name, x_bins, y_bins, symmetrical=False):
+    """Get the location of a shot as a cell index from a rectangular grid.
+
+    Paramters
+    ---------
+    name : str
+        Name of the feature.
+    x_bins : list
+        A monotonically increasing array of bin edges for the length of the
+        pitch, including the rightmost edge. In degrees.
+    y_bins : list
+        A monotonically increasing array of bin edges for the width of the
+        pitch, including the rightmost edge. In meters.
+    symmetrical : bool
+        Whether to use a symmetrical grid. Default: False.
+
+    Examples
+    --------
+    >>> shot_zone = feature("gamestates", ["shot_zone"])(
+    >>>     rectangular_grid(
+    >>>         "shot_zone",
+    >>>         np.arange(0, 105, 2),
+    >>>         np.arange(0, 68, 2),
+    >>>         symmetrical=False,
+    >>>     )
+    >>> )
+    """
+
     @simple
     def fn(actions):
         zonedf = actions[["start_x", "start_y"]].copy()
         if symmetrical:
-            m = (cfg["origin_y"] + cfg["width"]) / 2
+            m = (_spadl_cfg["origin_y"] + _spadl_cfg["width"]) / 2
             zonedf.loc[zonedf.start_y > m, "start_y"] -= m
         x_bin = np.digitize(zonedf.start_x, x_bins)
         y_bin = np.digitize(zonedf.start_y, y_bins)
@@ -762,6 +845,26 @@ def fn(actions):
 
 
 def custom_grid(name, zones, is_in_zone):
+    """Get the location of a shot as a cell index from a custom grid.
+
+    Paramters
+    ---------
+    name : str
+        Name of the feature.
+    zones : list
+    is_in_zone : callable
+
+    Examples
+    --------
+    >>> shot_zone = feature("gamestates", ["shot_zone"])(
+    >>>     custom_grid(
+    >>>         "shot_zone",
+    >>>         _caley_shot_matrix(),
+    >>>         _point_in_rect,
+    >>>     )
+    >>> )
+    """
+
     @simple
     def fn(actions):
         zonedf = actions[["start_x", "start_y"]].copy()
@@ -786,7 +889,7 @@ def fn(actions):
     return fn
 
 
-caley_grid = ftype("gamestates")(custom_grid("caley_zone", _caley_shot_matrix(), _point_in_rect))
+caley_grid = feature("gamestates", ["caley_zone"])(custom_grid("caley_zone", _caley_shot_matrix(), _point_in_rect))
 
 # ############################################################################
 # StatsBomb-specific attributes
@@ -857,7 +960,7 @@ def _is_inside_triangle(point, tri_points):
     return False
 
 
-@ftype("events")
+@feature("events", ["sb_open_goal"])
 def statsbomb_open_goal(events, shot_mask):
     """Get whether the shot was taken into an open goal.
 
@@ -879,13 +982,13 @@ def statsbomb_open_goal(events, shot_mask):
     output = {}
     for idx, shot in events.loc[shot_mask].iterrows():
         if "shot" in shot.extra:
-            output[idx] = {"open_goal": "open_goal" in shot.extra['shot']}
+            output[idx] = {"sb_open_goal": "open_goal" in shot.extra['shot']}
 
     output = pd.DataFrame.from_dict(output, orient="index")
     return output
 
 
-@ftype("events")
+@feature("events", ["sb_first_touch"])
 def statsbomb_first_touch(events, shot_mask):
     """Get whether the shot was a first-touch shot.
 
@@ -907,13 +1010,13 @@ def statsbomb_first_touch(events, shot_mask):
     output = {}
     for idx, shot in events.loc[shot_mask].iterrows():
         if "shot" in shot.extra:
-            output[idx] = {"first_touch": "first_time" in shot.extra['shot']}
+            output[idx] = {"sb_first_touch": "first_time" in shot.extra['shot']}
 
     output = pd.DataFrame.from_dict(output, orient="index")
     return output
 
 
-@ftype("events")
+@feature("events", ["sb_free_projection_gaps", "sb_free_projection_pct"])
 def statsbomb_free_projection(events, shot_mask):
     """Get the free projection area.
 
@@ -985,14 +1088,14 @@ def statsbomb_free_projection(events, shot_mask):
                         new_free_projection.append(projection)
                 free_projection = [p for p in new_free_projection if p[1] - p[0] > 0]
         output[idx] = {
-            "free_projection_gaps": len(free_projection),
-            "free_projection_pct": np.sum(np.diff(free_projection)) / np.diff(goal)[0],
+            "sb_free_projection_gaps": len(free_projection),
+            "sb_free_projection_pct": np.sum(np.diff(free_projection)) / np.diff(goal)[0],
         }
     output = pd.DataFrame.from_dict(output, orient="index")
     return output
 
 
-@ftype("events")
+@feature("events", ["sb_goalkeeper_x", "sb_goalkeeper_y", "sb_goalkeeper_dist_to_ball", "sb_goalkeeper_dist_to_goal", "sb_goalkeeper_angle_to_goal"])
 def statsbomb_goalkeeper_position(events, shot_mask):
     """Get the goalkeeper's position.
 
@@ -1049,17 +1152,17 @@ def statsbomb_goalkeeper_position(events, shot_mask):
         goalkeeper_dist_to_ball = math.sqrt(dx_kb**2 + dy_kb**2)
 
         output[idx] = {
-            "goalkeeper_x": goalkeeper_x,
-            "goalkeeper_y": goalkeeper_y,
-            "goalkeeper_dist_to_ball": goalkeeper_dist_to_ball,
-            "goalkeeper_dist_to_goal": goalkeeper_dist_to_goal,
-            "goalkeeper_angle_to_goal": goalkeeper_angle_to_goal,
+            "sb_goalkeeper_x": goalkeeper_x,
+            "sb_goalkeeper_y": goalkeeper_y,
+            "sb_goalkeeper_dist_to_ball": goalkeeper_dist_to_ball,
+            "sb_goalkeeper_dist_to_goal": goalkeeper_dist_to_goal,
+            "sb_goalkeeper_angle_to_goal": goalkeeper_angle_to_goal,
         }
     output = pd.DataFrame.from_dict(output, orient="index")
     return output
 
 
-@ftype("events")
+@feature("events", ["sb_dist_to_defender", "sb_under_pressure", "sb_nb_defenders_in_shot_line", "sb_nb_defenders_behind_ball", "sb_one_on_one"])
 def statsbomb_defenders_position(events, shot_mask):
     """Get features describing the position of the defending players.
 
@@ -1120,23 +1223,23 @@ def statsbomb_defenders_position(events, shot_mask):
             )
             behind_ball.append(defender_x > ball_x)
         output[idx] = {
-            "dist_to_defender": min(distances, default=float("inf")),
-            "under_pressure": shot.under_pressure,
-            "nb_defenders_in_shot_line": sum(in_shot_line),
-            "nb_defenders_behind_ball": sum(behind_ball),
-            "one_on_one": (
+            "sb_dist_to_defender": min(distances, default=float("inf")),
+            "sb_under_pressure": shot.under_pressure,
+            "sb_nb_defenders_in_shot_line": sum(in_shot_line),
+            "sb_nb_defenders_behind_ball": sum(behind_ball),
+            "sb_one_on_one": (
                 sum(behind_ball) == 0
                 and sum(in_shot_line) == 0
                 and shot.extra["shot"]["body_part"]["name"] in ["Left Foot", "Right Foot"]
             ),
         }
     output = pd.DataFrame.from_dict(output, orient="index")
-    output["one_on_one"] = output["one_on_one"].astype('boolean')
-    output["under_pressure"] = output["under_pressure"].astype('boolean')
+    output["sb_one_on_one"] = output["sb_one_on_one"].astype('boolean')
+    output["sb_under_pressure"] = output["sb_under_pressure"].astype('boolean')
     return output
 
 
-@ftype("events")
+@feature("events", ["sb_end_x_assist", "sb_end_y_assist", "sb_carry_dist", "sb_type_name_assist", "sb_height_assist"])
 def statsbomb_assist(events, shot_mask):  # noqa: C901
     """Get features describing the assist.
 
@@ -1145,7 +1248,7 @@ def statsbomb_assist(events, shot_mask):  # noqa: C901
         - end_y_assist: The assisting pass' y-coordinate
         - carry_dist: The distance between the end location of the assisting
           pass and the location of the shot.
-        - type_assist: The assist type, which is one of 'standard_pass',
+        - type_name_assist: The assist type, which is one of 'standard_pass',
           'free_kick', 'corner', 'throw_in', 'cross', 'cut_back' or 'through_ball'.
         - height_assist: The peak height of the assisting pass, which is one of
           'ground', 'low' (under shoulder level) or 'high' (above shoulder
@@ -1210,16 +1313,16 @@ def statsbomb_assist(events, shot_mask):  # noqa: C901
                 assist_height = m[assist.extra["pass"]["height"]["name"]]
 
         output[event_id] = {
-            "end_x_assist": assist_x,
-            "end_y_assist": assist_y,
-            "carry_dist": math.sqrt((shot_x - assist_x) ** 2 + (shot_y - assist_y) ** 2),
-            "type_assist": assist_type,
-            "height_assist": assist_height,
+            "sb_end_x_assist": assist_x,
+            "sb_end_y_assist": assist_y,
+            "sb_carry_dist": math.sqrt((shot_x - assist_x) ** 2 + (shot_y - assist_y) ** 2),
+            "sb_type_name_assist": assist_type,
+            "sb_height_assist": assist_height,
         }
 
     output = pd.DataFrame.from_dict(output, orient="index")
-    output["type_assist"] = pd.Categorical(
-        output["type_assist"],
+    output["sb_type_assist"] = pd.Categorical(
+        output["sb_type_name_assist"],
         categories=[
             "standard_pass",
             "free_kick",
@@ -1231,13 +1334,13 @@ def statsbomb_assist(events, shot_mask):  # noqa: C901
         ],
         ordered=False,
     )
-    output["height_assist"] = pd.Categorical(
-        output["height_assist"], categories=["ground", "low", "high"], ordered=True
+    output["sb_height_assist"] = pd.Categorical(
+        output["sb_height_assist"], categories=["ground", "low", "high"], ordered=True
     )
     return output
 
 
-@ftype("events")
+@feature("events", ["sb_from_counterattack"])
 def statsbomb_counterattack(events, shot_mask):
     """Get whether a shot was from a counterattack.
 
@@ -1259,14 +1362,14 @@ def statsbomb_counterattack(events, shot_mask):
     output = {}
     for idx, shot in events.loc[shot_mask].iterrows():
         output[idx] = {
-            "from_counterattack": shot.play_pattern_name == "From Counter",
+            "sb_from_counterattack": shot.play_pattern_name == "From Counter",
         }
 
     output = pd.DataFrame.from_dict(output, orient="index")
     return output
 
 
-@ftype("events")
+@feature("events", ["sb_impact_height"])
 def statsbomb_shot_impact_height(events, shot_mask):
     """Get the height of the ball when the shot was taken.
 
@@ -1307,41 +1410,23 @@ def statsbomb_shot_impact_height(events, shot_mask):
             height = "low"
         elif shot.extra["shot"]["technique"]["name"] == "Overhead Kick":
             height = "high"
-        output[idx] = {"impact_height": height}
+        output[idx] = {"sb_impact_height": height}
 
     output = pd.DataFrame.from_dict(output, orient="index")
-    output["impact_height"] = pd.Categorical(
-        output["impact_height"], categories=["ground", "low", "high"], ordered=True
+    output["sb_impact_height"] = pd.Categorical(
+        output["sb_impact_height"], categories=["ground", "low", "high"], ordered=True
     )
     return output
 
 
-default_features = [
-    actiontype,
-    bodypart,
-    result,
-    startlocation,
-    endlocation,
-    movement,
-    space_delta,
-    startpolar,
-    endpolar,
-    team,
-    time_delta,
-    speed,
-    shot_angle,
-    caley_grid,
-    ftype("gamestates")(
-        triangular_grid(
-            "angle_zone",
-            [-50, -20, 20, 50],
-            [2, 4, 8, 11, 16, 24, 34, 50],
-            symmetrical=True,
-        )
-    ),
+default_features : list[AttributeGenerator | str] = [
+    shot_type,
+    shot_location,
+    shot_dist,
+    shot_visible_angle,
 ]
 
-default_labels = [goal_from_shot]
+default_labels : list[AttributeGenerator | str] = [goal_from_shot]
 
 
 def compute_attributes(
@@ -1353,7 +1438,7 @@ def compute_attributes(
     shotfilter=None,
     nb_prev_actions=3,
 ):
-    """Extract xG features for a given game.
+    """Extract xG features and labels for a given game.
 
     Parameters
     ----------
@@ -1364,9 +1449,9 @@ def compute_attributes(
     events: pd.DataFrame
         A DataFrame containing the raw provider-specific events corresponding
         to ``actions``. Can be used to calculate provider-specific features.
-    xfns : list(callable)
+    xfns : list(callable or str)
         List of feature generators to apply. Defaults to ``default_features``.
-    yfns : list(callable)
+    yfns : list(callable or str)
         List of label generators to apply. Defaults to ``default_labels``.
     shotfilter: callable(pd.Series) -> bool
         A function that takes a shot (in SPADL format) and returns True if the
@@ -1396,11 +1481,11 @@ def compute_attributes(
     shot_events_idx = actions.loc[shot_mask, "original_event_id"]
 
     # handle inputs with no shots or no attributes
-    if shot_mask.sum() < 1:
+    if shot_mask.sum() == 0:
         # TODO: create the expected columns
-        return pd.DataFrame()
-    if len(xfns + yfns) < 1:
-        return pd.DataFrame(index=shot_actions_idx)
+        return pd.DataFrame(index=pd.Index([], name="action_id")), pd.DataFrame(index=pd.Index([], name="action_id"))
+    if len(xfns + yfns) == 0:
+        return pd.DataFrame(index=shot_actions_idx), pd.DataFrame(index=shot_actions_idx)
 
     # convert actions to ltr orientation
     actions_ltr = spadl.utils.play_left_to_right(actions, game.home_team_id)
@@ -1414,17 +1499,37 @@ def compute_attributes(
     shot_gamestates_ltr[0]["result_id"] = float("NaN")
 
     # compute features and labels
-    def _apply_fns(fns):
+    def _apply_fns(fns, registry):
         attrs = []
+        _fns = {}
         for fn in fns:
+            if isinstance(fn, str):
+                if fn not in registry:
+                    raise ValueError(
+                       f"Unkown feature '{fn}'. Valid feature names are [{', '.join(registry)}]"
+                   )
+                _fn = registry[fn]
+                if _fn not in _fns:
+                    _fns[_fn] = [fn]
+                else:
+                    _fns[_fn].append(fn)
+            else:
+                _fns[fn] = None
+        for fn, cols in _fns.items():
+            new_attrs = None
             if getattr(fn, "ftype", None) == "gamestates":
-                attrs.append(fn(shot_gamestates_ltr).set_index(shot_events_idx))
+                new_attrs = fn(shot_gamestates_ltr).set_index(shot_events_idx)
             elif getattr(fn, "ftype", None) == "actions":
-                attrs.append(fn(actions_ltr, shot_mask).set_index(shot_events_idx))
+                new_attrs = fn(actions_ltr, shot_mask).set_index(shot_events_idx)
             elif getattr(fn, "ftype", None) == "events":
-                attrs.append(fn(events, shot_events_idx))
+                new_attrs = fn(events, shot_events_idx)
             else:
-                warnings.warn("Unknown attribute type for {}.".format(fn.__name__), stacklevel=2)
+                warnings.warn(f"Unknown attribute type for {fn.__name__}.", stacklevel=2)
+            if new_attrs is not None:
+                if cols is not None:
+                    attrs.append(new_attrs[cols])
+                else:
+                    attrs.append(new_attrs)
         attrs = pd.concat(attrs, axis=1).loc[shot_events_idx].set_index(shot_actions_idx)
         attrs.index.name = "action_id"
         # fill missing values
@@ -1434,7 +1539,14 @@ def _apply_fns(fns):
         attrs.rename(columns=lambda s: s.replace("a0", "shot"), inplace=True)
         return attrs
 
-    X = _apply_fns(xfns)
-    y = _apply_fns(yfns)
+    X = _apply_fns(xfns, _FEATURE_REGISTRY) if len(xfns) > 0 else None
+    y = _apply_fns(yfns, _LABEL_REGISTRY) if len(yfns) > 0 else None
+
+    if X is None:
+        assert y is not None
+        X = pd.DataFrame(index=y.index)
+    if y is None:
+        assert X is not None
+        y = pd.DataFrame(index=X.index)
 
     return X, y
diff --git a/soccer_xg/calibration.py b/soccer_xg/calibration.py
index 7612baf..941aaf6 100644
--- a/soccer_xg/calibration.py
+++ b/soccer_xg/calibration.py
@@ -7,9 +7,9 @@
 from sklearn.isotonic import IsotonicRegression
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import check_cv
-from sklearn.preprocessing import LabelBinarizer, label_binarize
+from sklearn.preprocessing import LabelBinarizer
 from sklearn.svm import LinearSVC
-from sklearn.utils import check_X_y, column_or_1d, indexable, check_consistent_length
+from sklearn.utils import column_or_1d, indexable
 from sklearn.utils.validation import check_is_fitted
 
 
@@ -93,7 +93,7 @@ def __init__(self, base_estimator=None, method=None, cv=3, score_type=None):
         self.score_type = score_type
 
     def fit(self, X, y, sample_weight=None):
-        """Fit the calibrated model
+        """Fit the calibrated model.
 
         Parameters
         ----------
@@ -187,7 +187,7 @@ def fit(self, X, y, sample_weight=None):
         return self
 
     def predict_proba(self, X):
-        """Posterior probabilities of classification
+        """Posterior probabilities of classification.
 
         This function returns posterior probabilities of classification
         according to each class on an array of test vectors X.
@@ -217,7 +217,7 @@ def predict_proba(self, X):
         return mean_proba
 
     def calibrate_scores(self, df):
-        """Posterior probabilities of classification
+        """Posterior probabilities of classification.
 
         This function returns posterior probabilities of classification
         according to each class on an array of test vectors X.
@@ -246,8 +246,9 @@ def calibrate_scores(self, df):
         return mean_proba
 
     def predict(self, X):
-        """Predict the target of new samples. Can be different from the
-        prediction of the uncalibrated classifier.
+        """Predict the target of new samples.
+
+        Can be different from the prediction of the uncalibrated classifier.
 
         Parameters
         ----------
@@ -336,7 +337,7 @@ def _preproc(self, X):
         return df, idx_pos_class
 
     def fit(self, X, y, sample_weight=None):
-        """Calibrate the fitted model
+        """Calibrate the fitted model.
 
         Parameters
         ----------
@@ -425,7 +426,7 @@ def predict_proba(self, X):
         return proba
 
     def calibrate_scores(self, df):
-        """Posterior probabilities of classification
+        """Posterior probabilities of classification.
 
         This function returns posterior probabilities of classification
         according to each class on an array of test vectors X.
@@ -522,15 +523,15 @@ def predict(self, T):
 
 
 class _DummyCalibration(BaseEstimator, RegressorMixin):
-    """Dummy regression model. The purpose of this class is to give
+    """Dummy regression model.
+
+    The purpose of this class is to give
     the CalibratedClassifierCV class the option to just return the
     probabilities of the base classifier.
-
-
     """
 
     def fit(self, X, y, sample_weight=None):
-        """Does nothing.
+        """Do nothing.
 
         Parameters
         ----------
@@ -564,86 +565,3 @@ def predict(self, T):
             The predicted data.
         """
         return T
-
-
-def calibration_curve(y_true, y_prob, normalize=False, n_bins=5):
-    """Compute true and predicted probabilities for a calibration curve.
-
-    Read more in the :ref:`User Guide <calibration>`.
-
-    Parameters
-    ----------
-    y_true : array, shape (n_samples,)
-        True targets.
-
-    y_prob : array, shape (n_samples,)
-        Probabilities of the positive class.
-
-    normalize : bool, optional, default=False
-        Whether y_prob needs to be normalized into the bin [0, 1], i.e. is not
-        a proper probability. If True, the smallest value in y_prob is mapped
-        onto 0 and the largest one onto 1.
-
-    n_bins : int
-        Number of bins. A bigger number requires more data.
-
-    Returns
-    -------
-    prob_true : array, shape (n_bins,)
-        The true probability in each bin (fraction of positives).
-
-    prob_pred : array, shape (n_bins,)
-        The mean predicted probability in each bin.
-
-    References
-    ----------
-    Alexandru Niculescu-Mizil and Rich Caruana (2005) Predicting Good
-    Probabilities With Supervised Learning, in Proceedings of the 22nd
-    International Conference on Machine Learning (ICML).
-    See section 4 (Qualitative Analysis of Predictions).
-    """
-    y_true = column_or_1d(y_true)
-    y_prob = column_or_1d(y_prob)
-
-    if normalize:  # Normalize predicted values into interval [0, 1]
-        y_prob = (y_prob - y_prob.min()) / (y_prob.max() - y_prob.min())
-    elif y_prob.min() < 0 or y_prob.max() > 1:
-        raise ValueError('y_prob has values outside [0, 1] and normalize is ' 'set to False.')
-
-    y_true = _check_binary_probabilistic_predictions(y_true, y_prob)
-
-    bins = np.linspace(0.0, 1.0 + 1e-8, n_bins + 1)
-    binids = np.digitize(y_prob, bins) - 1
-
-    bin_sums = np.bincount(binids, weights=y_prob, minlength=len(bins))
-    bin_true = np.bincount(binids, weights=y_true, minlength=len(bins))
-    bin_total = np.bincount(binids, minlength=len(bins))
-
-    zero = bin_total == 0
-    bin_total[zero] = 2
-    # nonzero = bin_total != 0
-
-    prob_true = bin_true / bin_total
-    prob_pred = bin_sums / bin_total
-
-    return prob_true, prob_pred
-
-
-def _check_binary_probabilistic_predictions(y_true, y_prob):
-    """Check that y_true is binary and y_prob contains valid probabilities"""
-    check_consistent_length(y_true, y_prob)
-
-    labels = np.unique(y_true)
-
-    if len(labels) != 2:
-        raise ValueError(
-            'Only binary classification is supported. ' 'Provided labels %s.' % labels
-        )
-
-    if y_prob.max() > 1:
-        raise ValueError('y_prob contains values greater than 1.')
-
-    if y_prob.min() < 0:
-        raise ValueError('y_prob contains values less than 0.')
-
-    return label_binarize(y_true, classes=labels)[:, 0]
diff --git a/soccer_xg/data/hdf.py b/soccer_xg/data/hdf.py
index a6fb859..1ffb22f 100644
--- a/soccer_xg/data/hdf.py
+++ b/soccer_xg/data/hdf.py
@@ -133,7 +133,7 @@ def players(self, game_id: Optional[int] = None) -> pd.DataFrame:
                 raise IndexError(f"No game found with ID={game_id}")
         else:
             players = self["player_games"]
-            cols = ["team_id", "player_id", "player_name", "nickname"]
+            cols = ["team_id", "player_id", "player_name"]
             return players[cols].drop_duplicates().set_index(["player_id"])
 
     def events(self, game_id: int) -> pd.DataFrame:
diff --git a/soccer_xg/metrics.py b/soccer_xg/metrics.py
index 12dc49e..e4d56bb 100644
--- a/soccer_xg/metrics.py
+++ b/soccer_xg/metrics.py
@@ -1,7 +1,38 @@
 """A collection of metrics for evaluation xG models."""
+import warnings
+
 import numpy as np
 from scipy import integrate
 from sklearn.neighbors import KernelDensity
+from sklearn.preprocessing import label_binarize
+from sklearn.utils import check_consistent_length, column_or_1d
+
+
+def _check_binary_probabilistic_predictions(y_true, y_prob):
+    """Check that y_true is binary and y_prob contains valid probabilities"""
+    # convert to 1D numpy array
+    y_true = column_or_1d(y_true)
+    y_prob = column_or_1d(y_prob)
+
+    # check equal length
+    check_consistent_length(y_true, y_prob)
+
+    if y_prob.min() < 0 or y_prob.max() > 1:
+        warnings.warn(
+            "y_prob has values outside [0, 1] and normalize is set to False. "
+            "Probalities outside [0, 1] will be clipped."
+        )
+        y_prob = np.clip(y_prob, a_min=0, a_max=1)
+
+    # check if binary classification
+    labels = np.unique(y_true)
+    if len(labels) != 2:
+        raise ValueError(
+            'Only binary classification is supported. Provided labels %s.' % labels
+        )
+
+    return label_binarize(y_true, classes=labels)[:, 0], y_prob
+
 
 
 def expected_calibration_error(y_true, y_prob, n_bins=5, strategy='uniform'):
@@ -41,31 +72,21 @@ def expected_calibration_error(y_true, y_prob, n_bins=5, strategy='uniform'):
 
     References
     ----------
-    [1]: Chuan Guo, Geoff Pleiss, Yu Sun, Kilian Q. Weinberger,
-       On Calibration of Modern Neural Networks.
-       Proceedings of the 34th International Conference on Machine Learning
-       (ICML 2017).
-       arXiv:1706.04599
-       https://arxiv.org/pdf/1706.04599.pdf
-    [2]: Nixon, Jeremy, et al.,
-       Measuring calibration in deep learning.
-       arXiv:1904.01685
-       https://arxiv.org/abs/1904.01685
-
+    .. [1] Chuan Guo, Geoff Pleiss, Yu Sun, Kilian Q. Weinberger, On
+           Calibration of Modern Neural Networks. Proceedings of the 34th
+           International Conference on Machine Learning (ICML 2017).
+           arXiv:1706.04599 https://arxiv.org/pdf/1706.04599.pdf
+    .. [2] Nixon, Jeremy, et al., Measuring calibration in deep learning.
+           arXiv:1904.01685 https://arxiv.org/abs/1904.01685
     """
 
-    if y_prob.shape != y_true.shape:
-        raise ValueError('Shapes must match')
-    if y_prob.min() < 0 or y_prob.max() > 1:
-        raise ValueError('y_prob has values outside [0, 1].')
-    labels = np.unique(y_true)
-    if len(labels) > 2:
-        raise ValueError('Only binary classification is supported.')
+    y_true, y_prob = _check_binary_probabilistic_predictions(y_true, y_prob)
 
     if strategy == 'quantile':  # Determine bin edges by distribution of data
         quantiles = np.linspace(0, 1, n_bins + 1)
         bins = np.percentile(y_prob, quantiles * 100)
-        bins[-1] = bins[-1] + 1e-8
+        bins[0] = 0 - 1e-8
+        bins[-1] = 1 + 1e-8
     elif strategy == 'uniform':
         bins = np.linspace(0.0, 1.0 + 1e-8, n_bins + 1)
     else:
@@ -99,7 +120,93 @@ def _reliability(y_true, y_prob, bins):
     return accs, confs, counts
 
 
-def bayesian_calibration_curve(y_true, y_pred, n_bins=100):
+def calibration_curve(y_true, y_prob, bins=10, bin_strategy="uniform"):
+    """Compute true and predicted probabilities for a calibration curve.
+
+    Parameters
+    ----------
+    y_true : array (n_samples, )
+        Labels indicating the true class.
+    y_prob : array (n_samples, )
+        Output probability scores.
+    bins : int or list of floats
+        Number of bins to create in the scores' space, or list of bin
+        boundaries. More bins require more data.
+    bin_strategy : {'uniform', 'quantile'}, default='uniform'
+        Strategy used to define the widths of the bins.
+
+        uniform
+            The bins have identical widths.
+        quantile
+            The bins have the same number of samples and depend on `y_prob`.
+
+    Returns
+    -------
+    avg_true : array, shape (n_bins,)
+        The true probability in each bin (fraction of positives).
+
+    avg_pred : array, shape (n_bins,)
+        The mean predicted probability in each bin.
+
+    bin_true : array, shape (n_bins,)
+        Number of true samples in each bin.
+
+    bin_total : array, shape (n_bins,)
+        Number of samples in each bin.
+
+    References
+    ----------
+    .. [1] Alexandru Niculescu-Mizil and Rich Caruana (2005) Predicting Good
+           Probabilities With Supervised Learning, in Proceedings of the 22nd
+           International Conference on Machine Learning (ICML).
+           See section 4 (Qualitative Analysis of Predictions).
+    """
+    y_true, y_prob = _check_binary_probabilistic_predictions(y_true, y_prob)
+
+    if isinstance(bins, int):
+        n_bins = bins
+        if bin_strategy == "quantile":  # Determine bin edges by distribution of data
+            quantiles = np.linspace(0, 1, n_bins + 1)
+            bins = np.percentile(y_prob, quantiles * 100)
+            bins[0] = 0 - 1e-8
+            bins[-1] = 1 + 1e-8
+        elif bin_strategy == "uniform":
+            bins = np.linspace(0, 1 + 1e-8, n_bins + 1)
+        else:
+            raise ValueError(
+                "Invalid entry to 'strategy' input. Strategy "
+                "must be either 'quantile' or 'uniform'."
+            )
+    elif isinstance(bins, list) or isinstance(bins, np.ndarray):
+        n_bins = len(bins) - 1
+        bins = np.array(bins)
+        if bins[0] == 0.0:
+            bins[0] = 0 - 1e-8
+        if bins[-1] == 1.0:
+            bins[-1] = 1 + 1e-8
+    else:
+        raise ValueError(
+            "Invalid entry to 'bins' input. The must be either "
+            "a list of bin boundaries or the number of bins."
+        )
+
+    bin_idx = np.digitize(y_prob, bins) - 1
+
+    bin_true = np.bincount(bin_idx, weights=y_true, minlength=n_bins)
+    bin_pred = np.bincount(bin_idx, weights=y_prob, minlength=n_bins)
+    bin_total = np.bincount(bin_idx, minlength=n_bins)
+
+    zero_idx = bin_total == 0
+    avg_true = np.empty(bin_total.shape[0])
+    avg_true.fill(np.nan)
+    avg_true[~zero_idx] = np.divide(bin_true[~zero_idx], bin_total[~zero_idx])
+    avg_pred = np.empty(bin_total.shape[0])
+    avg_pred.fill(np.nan)
+    avg_pred[~zero_idx] = np.divide(bin_pred[~zero_idx], bin_total[~zero_idx])
+    return avg_true, avg_pred, bin_true, bin_total
+
+
+def bayesian_calibration_curve(y_true, y_prob, n_bins=100):
     """Compute true and predicted probabilities for a calibration curve using
     kernel density estimation instead of bins with a fixed width.
 
@@ -115,21 +222,24 @@ def bayesian_calibration_curve(y_true, y_pred, n_bins=100):
 
     Returns
     -------
-    prob_true : ndarray of shape (n_bins,)
-        The proportion of samples whose class is the positive class, in each
-        bin (fraction of positives).
-    prob_pred : ndarray of shape (n_bins,)
+    avg_true : array, shape (n_bins,)
+        The true probability in each bin (fraction of positives).
+
+    avg_pred : array, shape (n_bins,)
         The mean predicted probability in each bin.
-    number_total : ndarray of shape (n_bins,)
-        The number of examples in each bin.
+
+    bin_true : array, shape (n_bins,)
+        Number of true samples in each bin.
+
+    bin_total : array, shape (n_bins,)
+        Number of samples in each bin.
     """
-    y_pred = np.array(y_pred)
-    y_true = np.array(y_true, dtype=bool)
+    y_true, y_prob = _check_binary_probabilistic_predictions(y_true, y_prob)
+    y_true = y_true.astype(bool)
+
     bandwidth = 1 / n_bins
-    kde_pos = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(
-        (y_pred[y_true])[:, np.newaxis]
-    )
-    kde_total = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(y_pred[:, np.newaxis])
+    kde_pos = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit((y_prob[y_true])[:, np.newaxis])
+    kde_total = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(y_prob[:, np.newaxis])
     sample_probabilities = np.linspace(0.01, 0.99, 99)
     number_density_offense_won = np.exp(
         kde_pos.score_samples(sample_probabilities[:, np.newaxis])
@@ -142,8 +252,9 @@ def bayesian_calibration_curve(y_true, y_pred, n_bins=100):
     predicted_pos_percents = np.nan_to_num(number_pos / number_total, 1)
 
     return (
-        100.0 * sample_probabilities,
-        100.0 * predicted_pos_percents,
+        predicted_pos_percents,
+        sample_probabilities,
+        number_pos,
         number_total,
     )
 
diff --git a/soccer_xg/ml/logreg.py b/soccer_xg/ml/logreg.py
index 5c72518..d0a4b98 100644
--- a/soccer_xg/ml/logreg.py
+++ b/soccer_xg/ml/logreg.py
@@ -6,70 +6,32 @@
 
 
 def logreg_gridsearch_classifier(
-    numeric_features,
-    categoric_features,
-    learning_rate=0.08,
-    use_dask=False,
-    n_iter=100,
-    scoring='roc_auc',
+    numeric_features: list[str],
+    categoric_features: list[str],
+    learning_rate: float | str | None = 0.08,
+    use_dask: bool = False,
+    n_iter: int = 100,
+    scoring: str = 'roc_auc',
 ):
-    """
-    Simple classification pipeline using hyperband to optimize logreg hyper-parameters
-    Parameters
-    ----------
-    `numeric_features` : The list of numeric features
-    `categoric_features` : The list of categoric features
-    `learning_rate` : The learning rate
-    """
+    """Simple classification pipeline using random search to optimize logreg hyper-parameters.
 
-    return _logreg_gridsearch_model(
-        'classification',
-        numeric_features,
-        categoric_features,
-        learning_rate,
-        use_dask,
-        n_iter,
-        scoring,
-    )
-
-
-def logreg_gridsearch_regressor(
-    numeric_features,
-    categoric_features,
-    learning_rate=0.08,
-    use_dask=False,
-    n_iter=100,
-    scoring='roc_auc',
-):
-    """
-    Simple regression pipeline using hyperband to optimize logreg hyper-parameters
     Parameters
     ----------
-    `numeric_features` : The list of numeric features
-    `categoric_features` : The list of categoric features
-    `learning_rate` : The learning rate
-    """
-
-    return _logreg_gridsearch_model(
-        'regression',
-        numeric_features,
-        categoric_features,
-        learning_rate,
-        use_dask,
-        n_iter,
-        scoring,
-    )
+    numeric_features : list(str)
+        The list of numeric features
+    categoric_features : list(str)
+        The list of categoric features
+    learning_rate : float
+        The learning rate. If None, a sklearn.linear_model.LogisticRegression is used.
+        Otherwise, a sklearn.linear_model.SGDClassifier is used.
+    use_dask : bool
+        If True, use dask for parallelizing a grid search over the hyper-parameters.
 
-
-def _logreg_gridsearch_model(
-    task,
-    numeric_features,
-    categoric_features,
-    learning_rate,
-    use_dask,
-    n_iter,
-    scoring,
-):
+    Returns
+    -------
+    sklearn.model_selection.RandomizedSearchCV
+        The classifier with the best hyper-parameters found by the random search.
+    """
     if learning_rate is None:
         param_space = {
             'clf__C': np.logspace(-5, 5, 100),
@@ -87,7 +49,7 @@ def _logreg_gridsearch_model(
         model = SGDClassifier(
             learning_rate=learning_rate_schedule,
             eta0=eta0,
-            loss='log',
+            loss='log_loss',
             max_iter=10000,
             fit_intercept=False,
         )
diff --git a/soccer_xg/ml/mlp.py b/soccer_xg/ml/mlp.py
index 1a1ce35..03e4a3f 100644
--- a/soccer_xg/ml/mlp.py
+++ b/soccer_xg/ml/mlp.py
@@ -1,5 +1,5 @@
 from scipy.stats.distributions import randint, uniform
-from sklearn.neural_network import MLPClassifier, MLPRegressor
+from sklearn.neural_network import MLPClassifier
 from sklearn.pipeline import Pipeline
 
 from .preprocessing import simple_proc_for_linear_algoritms
@@ -21,55 +21,6 @@ def mlp_gridsearch_classifier(
     `categoric_features` : The list of categoric features
     `learning_rate` : The learning rate
     """
-
-    return _mlp_gridsearch_model(
-        'classification',
-        numeric_features,
-        categoric_features,
-        learning_rate,
-        use_dask,
-        n_iter,
-        scoring,
-    )
-
-
-def mlp_gridsearch_regressor(
-    numeric_features,
-    categoric_features,
-    learning_rate=0.08,
-    use_dask=False,
-    n_iter=100,
-    scoring='roc_auc',
-):
-    """
-    Simple regression pipeline using hyperband to optimize mlp hyper-parameters
-    Parameters
-    ----------
-    `numeric_features` : The list of numeric features
-    `categoric_features` : The list of categoric features
-    `learning_rate` : The learning rate
-    """
-
-    return _mlp_gridsearch_model(
-        'regression',
-        numeric_features,
-        categoric_features,
-        learning_rate,
-        use_dask,
-        n_iter,
-        scoring,
-    )
-
-
-def _mlp_gridsearch_model(
-    task,
-    numeric_features,
-    categoric_features,
-    learning_rate,
-    use_dask,
-    n_iter,
-    scoring,
-):
     param_space = {
         'clf__hidden_layer_sizes': [
             (24,),
@@ -84,11 +35,7 @@ def _mlp_gridsearch_model(
         'clf__learning_rate': ['constant', 'adaptive'],
     }
 
-    model = (
-        MLPClassifier(learning_rate_init=learning_rate)
-        if task == 'classification'
-        else MLPRegressor(learning_rate_init=learning_rate)
-    )
+    model = MLPClassifier(learning_rate_init=learning_rate)
 
     pipe = Pipeline(
         [
diff --git a/soccer_xg/ml/pipeline.py b/soccer_xg/ml/pipeline.py
index 4672869..dd44eab 100644
--- a/soccer_xg/ml/pipeline.py
+++ b/soccer_xg/ml/pipeline.py
@@ -306,3 +306,24 @@ def fit(self, X, y=None):
     def transform(self, X):
         row = self.rowle.transform(X[self.rowname])
         return self.embedding[row, :]
+
+
+class InteractionFeature(BaseEstimator, TransformerMixin):
+    def __init__(self, columns_to_multiply, new_column_name=None):
+        self.columns_to_multiply = columns_to_multiply
+        if new_column_name is None:
+            self.new_column_name = "_x_".join(columns_to_multiply)
+        else:
+            self.new_column_name = new_column_name
+
+    def fit(self, X, y=None):
+        return self
+
+    def transform(self, X):
+        X_copy = X.copy()
+        X_copy[self.new_column_name] = X_copy[self.columns_to_multiply[0]]
+
+        for col in self.columns_to_multiply[1:]:
+            X_copy[self.new_column_name] *= X_copy[col]
+
+        return X_copy
diff --git a/soccer_xg/ml/preprocessing.py b/soccer_xg/ml/preprocessing.py
index 319caf9..245fec5 100644
--- a/soccer_xg/ml/preprocessing.py
+++ b/soccer_xg/ml/preprocessing.py
@@ -19,7 +19,6 @@ def simple_proc_for_tree_algoritms(numeric_features, categoric_features):
     numpipe = make_pipeline(
         ColumnsSelector(numeric_features),
         SimpleImputer(strategy='mean'),
-        StandardScaler(),
     )
     if numeric_features and categoric_features:
         return make_union(catpipe, numpipe)
@@ -45,7 +44,7 @@ def simple_proc_for_linear_algoritms(numeric_features, categoric_features):
     numpipe = make_pipeline(
         ColumnsSelector(numeric_features),
         SimpleImputer(strategy='mean'),
-        StandardScaler(),
+        # StandardScaler(),
     )
     if numeric_features and categoric_features:
         return make_union(catpipe, numpipe)
diff --git a/soccer_xg/ml/xgboost.py b/soccer_xg/ml/xgboost.py
index 4b4fb5a..ea330f1 100644
--- a/soccer_xg/ml/xgboost.py
+++ b/soccer_xg/ml/xgboost.py
@@ -13,63 +13,17 @@ def xgboost_gridsearch_classifier(
     n_iter=100,
     scoring='roc_auc',
 ):
-    """
-    Simple classification pipeline using hyperband to optimize xgboost hyper-parameters
-    Parameters
-    ----------
-    `numeric_features` : The list of numeric features
-    `categoric_features` : The list of categoric features
-    `learning_rate` : The learning rate
-    """
+    """Simple classification pipeline using random search to optimize xgboost hyper-parameters.
 
-    return _xgboost_gridsearch_model(
-        'classification',
-        numeric_features,
-        categoric_features,
-        learning_rate,
-        use_dask,
-        n_iter,
-        scoring,
-    )
-
-
-def xgboost_gridsearch_regressor(
-    numeric_features,
-    categoric_features,
-    learning_rate=0.08,
-    use_dask=False,
-    n_iter=100,
-    scoring='roc_auc',
-):
-    """
-    Simple regression pipeline using hyperband to optimize xgboost hyper-parameters
     Parameters
     ----------
     `numeric_features` : The list of numeric features
     `categoric_features` : The list of categoric features
     `learning_rate` : The learning rate
+    `use_dask` : Whether to use dask or not
+    `n_iter` : The number of iterations for the random search
+    `scoring` : The scoring function to use
     """
-
-    return _xgboost_gridsearch_model(
-        'regression',
-        numeric_features,
-        categoric_features,
-        learning_rate,
-        use_dask,
-        n_iter,
-        scoring,
-    )
-
-
-def _xgboost_gridsearch_model(
-    task,
-    numeric_features,
-    categoric_features,
-    learning_rate,
-    use_dask,
-    n_iter,
-    scoring,
-):
     param_space = {
         'clf__max_depth': randint(2, 11),
         'clf__min_child_weight': randint(1, 11),
@@ -83,11 +37,7 @@ def _xgboost_gridsearch_model(
         'clf__scale_pos_weight': uniform(0.1, 9.9),
     }
 
-    model = (
-        xgbsk.XGBClassifier(learning_rate=learning_rate)
-        if task == 'classification'
-        else xgbsk.XGBRegressor(learning_rate=learning_rate)
-    )
+    model = xgbsk.XGBClassifier(learning_rate=learning_rate)
 
     pipe = Pipeline(
         [
diff --git a/soccer_xg/utils.py b/soccer_xg/utils.py
index 69190cf..0b7c3b2 100644
--- a/soccer_xg/utils.py
+++ b/soccer_xg/utils.py
@@ -1,8 +1,11 @@
+"""Utility functions."""
 import math
 
 import pandas as pd
 from fuzzywuzzy import fuzz
 
+from .data import Dataset
+
 
 def match_name(name, list_names, min_score=0):
     # -1 score incase we don't get any matches
@@ -31,9 +34,7 @@ def map_names(
 ):
     # List for dicts for easy dataframe creation
     dict_list = []
-    for _, (id, name) in df1[
-        [df1_output_colname, df1_match_colname]
-    ].iterrows():
+    for _, (id, name) in df1[[df1_output_colname, df1_match_colname]].iterrows():
         # Use our method to find best match, we can set a threshold here
         match = match_name(name, df2[df2_match_colname], threshold)
         # New dict for storing data
@@ -43,11 +44,7 @@ def map_names(
         if match[1] > threshold:
             dict_.update({'df2_name': match[0]})
             dict_.update(
-                {
-                    'df2_id': df2.loc[
-                        df2[df2_match_colname] == match[0], df2_output_colname
-                    ].iloc[0]
-                }
+                {'df2_id': df2.loc[df2[df2_match_colname] == match[0], df2_output_colname].iloc[0]}
             )
         else:
             dict_.update({'df2_name': 'unknown'})
@@ -133,25 +130,21 @@ def get_matching_shot(
     # Get shots that happened around the same time
     ts = shot.time_seconds
     best_match = other_shots_by_player_in_period.iloc[
-        (other_shots_by_player_in_period['time_seconds'] - ts)
-        .abs()
-        .argsort()[:1]
+        (other_shots_by_player_in_period['time_seconds'] - ts).abs().argsort()[:1]
     ].iloc[0]
     if abs(ts - best_match.time_seconds) < 3:
         return best_match
     return None
 
 
-def sample_temporal(dataset, size_val=0.0, size_test=0.2):
+def sample_temporal(
+    dataset: Dataset, size_val: float = 0.0, size_test: float = 0.2
+) -> tuple[list[int], list[int], list[int]]:
     game_ids = dataset.games().sort_values(by='game_date').index.values
     nb_games = len(game_ids)
-    games_train = game_ids[
-        0 : math.floor((1 - size_val - size_test) * nb_games)
-    ]
+    games_train = game_ids[0 : math.floor((1 - size_val - size_test) * nb_games)]
     games_val = game_ids[
-        math.ceil((1 - size_val - size_test) * nb_games) : math.floor(
-            (1 - size_test) * nb_games
-        )
+        math.ceil((1 - size_val - size_test) * nb_games) : math.floor((1 - size_test) * nb_games)
     ]
     games_test = game_ids[math.ceil((1 - size_test) * nb_games) + 1 : -1]
     return games_train, games_val, games_test
diff --git a/soccer_xg/visualisation.py b/soccer_xg/visualisation.py
index c048992..90094a5 100644
--- a/soccer_xg/visualisation.py
+++ b/soccer_xg/visualisation.py
@@ -1,138 +1,444 @@
 import matplotlib.pyplot as plt
-import matplotsoccer as mps
 import numpy as np
 import numpy.ma as ma
 from matplotlib.ticker import MultipleLocator
 from sklearn.metrics import auc, roc_curve
+from sklearn.preprocessing import label_binarize
+from statsmodels.stats.proportion import proportion_confint
+import matplotlib.ticker as mticker
+from matplotlib.ticker import MaxNLocator
+from matplotlib import gridspec
+from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
+
 
 from soccer_xg import metrics
 
 
-def plot_calibration_curve(
-    y_true,
-    y_pred,
-    name='Calibration curve',
+def plot_reliability_diagram(
+    labels,
+    scores,
+    legend=None,
+    show_histogram=True,
+    bins=10,
+    bin_strategy="uniform",
+    bayesian=False,
     min_samples=None,
-    axis=None,
-    **kwargs,
+    fig=None,
+    show_counts=False,
+    ci=None,
+    shaded_ci=False,
+    interval_method='beta',
+    fmt='s-',
+    show_correction=False,
+    show_gaps=False,
+    sample_proportion=0,
+    color_list=None,
+    show_bars=False,
+    invert_histogram=False,
+    overlay_histogram=False,
+    color_gaps='lightcoral',
+    ax=None,
 ):
-    """Plot the validation data.
+    """Plot the reliability diagram of the given scores and true labels.
 
     Parameters
     ----------
-    axis : matplotlib.pyplot.axis object or ``None`` (default=``None``)
-        If provided, the validation line will be overlaid on ``axis``.
-        Otherwise, a new figure and axis will be generated and plotted on.
-    **kwargs
-        Arguments to ``axis.plot``.
+    labels : array (n_samples, )
+        Labels indicating the true class.
+    scores : array (n_samples,) or list of matrices
+        Output probability scores for one or several methods.
+    legend : list of strings or None
+        Text to use for the legend.
+    show_histogram : boolean
+        If True, it generates an additional figure showing the number of
+        samples in each bin.
+    bins : int or list of floats
+        Number of bins to create in the scores' space, or list of bin
+        boundaries.
+    bin_strategy : {'uniform', 'quantile'}, default='uniform'
+        Strategy used to define the widths of the bins.
+
+        uniform
+            The bins have identical widths.
+        quantile
+            The bins have the same number of samples and depend on `y_prob`.
+    bayesian : bool, default=False
+        Compute true and predicted probabilities for a calibration curve using
+        kernel density estimation instead of bins with a fixed width.
+    min_samples : int or None
+        Hide bins with less than 'min_samples'.
+    fig : matplotlib.pyplot.Figure or None
+        Figure to use for the plots, if None a new figure is created.
+    show_counts : boolean
+        If True shows the number of samples of each bin in its corresponding
+        line marker.
+    ci : float or None
+        If a float between 0 and 1 is passed, it shows an errorbar
+        corresponding to a confidence interval containing the specified
+        percentile of the data.
+    shaded_ci : boolean
+        If True, the confidence interval is shown as a shaded area instead of
+        error bars.
+    interval_method : string (default: 'beta')
+        Method to estimate the confidence interval which uses the function
+        proportion_confint from statsmodels.stats.proportion
+    fmt : string (default: 's-')
+        Format of the lines following the matplotlib.pyplot.plot standard.
+    show_correction : boolean
+        If True shows an arrow for each bin indicating the necessary correction
+        to the average scores in order to be perfectly calibrated.
+    show_gaps : boolean
+        If True shows the gap between the average predictions and the true
+        proportion of positive samples.
+    sample_proportion : float in the interval [0, 1] (default 0)
+        If bigger than 0, it shows the labels of the specified proportion of
+        samples.
+    color_list : list of strings or None
+        List of string colors indicating the color of each method.
+    show_bars : boolean
+        If True shows bars instead of lines.
+    invert_histogram : boolean
+        If True shows the histogram with the zero on top and highest number of
+        bin samples at the bottom.
+    overlay_histogram : boolean
+        If True, shows the histogram on the same plot as the reliability diagram.
+    color_gaps : string
+        Color of the gaps (if shown).
 
     Returns
     -------
-    matplotlib.pylot.axis
-        The axis the plot was made on.
-
-    Raises
-    ------
-    NotFittedError
-        If the model hasn't been fit **and** validated.
+    fig : matplotlib.pyplot.figure
+        Figure with the reliability diagram
     """
+    if isinstance(scores, list):
+        scores_list = scores
+    else:
+        scores_list = [
+            scores,
+        ]
+    n_scores = len(scores_list)
+    if color_list is None:
+        color_list = plt.rcParams['axes.prop_cycle'].by_key()['color']
 
-    if axis is None:
-        axis = plt.figure(figsize=(5, 5)).add_subplot(111)
-
-    axis.set_title(name)
-    axis.plot([0, 100], [0, 100], ls='--', lw=1, color='grey')
-    axis.set_xlabel('Predicted probability')
-    axis.set_ylabel('True probability in each bin')
-    axis.set_xlim((0, 100))
-    axis.xaxis.set_major_locator(MultipleLocator(20))
-    axis.xaxis.set_minor_locator(MultipleLocator(10))
-    axis.set_ylim((0, 100))
-    axis.yaxis.set_major_locator(MultipleLocator(20))
-    axis.yaxis.set_minor_locator(MultipleLocator(10))
-    # axis.set_aspect(1)
-    axis.grid(which='both')
-
-    (
-        sample_probabilities,
-        predicted_pos_percents,
-        num_plays_used,
-    ) = metrics.bayesian_calibration_curve(y_true, y_pred)
-
-    if min_samples is not None:
-        axis.plot(
-            sample_probabilities,
-            predicted_pos_percents,
-            c='c',
-            alpha=0.3,
-            **kwargs,
+    classes = np.unique(labels)
+    n_classes = len(classes)
+    if n_classes != 2:
+        raise ValueError(
+            'Only binary classification is supported. Provided labels %s.' % labels
         )
-        sample_probabilities = ma.array(sample_probabilities)
-        sample_probabilities[num_plays_used < min_samples] = ma.masked
-        predicted_pos_percents = ma.array(predicted_pos_percents)
-        predicted_pos_percents[num_plays_used < min_samples] = ma.masked
-
-    max_deviation = metrics.max_deviation(sample_probabilities, predicted_pos_percents)
-    residual_area = metrics.residual_area(sample_probabilities, predicted_pos_percents)
-
-    axis.plot(
-        sample_probabilities,
-        predicted_pos_percents,
-        c='c',
-        label='Calibration curve\n(area = %0.2f, max dev = %0.2f)'
-        % (residual_area, max_deviation),
-        **kwargs,
-    )
-
-    axis.legend(loc='lower right')
-
-    ax2 = axis.twinx()
-    ax2.hist(
-        y_pred * 100,
-        bins=np.arange(0, 101, 1),
-        density=True,
-        alpha=0.4,
-        facecolor='grey',
-    )
-    ax2.set_ylim([0, 0.2])
-    ax2.set_yticks([0, 0.1, 0.2])
-
-    plt.tight_layout()
-    return axis
+    labels = label_binarize(labels, classes=classes)[:, 0]
 
+    labels_list = []
 
-def plot_roc_curve(y_true, y_prob, name='Calibration curve', axis=None):
-    fpr, tpr, _ = roc_curve(y_true, y_prob)
-    roc_auc = auc(fpr, tpr)
+    if fig is None:
+        fig = plt.figure(figsize=(4, 4))
+
+    if show_histogram:
+        spec = gridspec.GridSpec(
+            ncols=1, nrows=2, height_ratios=[5, 1], wspace=0.02, hspace=0.04, left=0.15
+        )
+    else:
+        spec = gridspec.GridSpec(ncols=1, nrows=1, hspace=0.04, left=0.15)
+
+    if isinstance(bins, int):
+        n_bins = bins
+        if bin_strategy == "quantile":  # Determine bin edges by distribution of data
+            quantiles = np.linspace(0, 1, n_bins + 1)
+            bins = np.percentile(scores_list[0], quantiles * 100)
+            bins[0] = 0 - 1e-8
+            bins[-1] = 1 + 1e-8
+        elif bin_strategy == "uniform":
+            bins = np.linspace(0, 1 + 1e-8, n_bins + 1)
+        else:
+            raise ValueError(
+                "Invalid entry to 'strategy' input. Strategy "
+                "must be either 'quantile' or 'uniform'."
+            )
+    elif isinstance(bins, list) or isinstance(bins, np.ndarray):
+        n_bins = len(bins) - 1
+        bins = np.array(bins)
+        if bins[0] == 0.0:
+            bins[0] = 0 - 1e-8
+        if bins[-1] == 1.0:
+            bins[-1] = 1 + 1e-8
+    else:
+        raise ValueError(
+            "Invalid entry to 'bins' input. The must be either "
+            "a list of bin boundaries or the number of bins."
+        )
+
+    if ax is not None:
+        ax1 = ax
+    else:
+        ax1 = fig.add_subplot(spec[0])
+    # Perfect calibration
+    ax1.plot([0, 1], [0, 1], "--", color='lightgrey', zorder=10)
+    for j, score in enumerate(scores_list):
+        if labels_list:
+            labels = labels_list[j]
+
+        if bayesian:
+            avg_true, avg_pred, bin_true, bin_total = metrics.bayesian_calibration_curve(labels, score)
+            bins = np.linspace(0.01, 0.99, 99)
+        else:
+            avg_true, avg_pred, bin_true, bin_total = metrics.calibration_curve(labels, score, bins=bins)
+
+        zero_idx = bin_total == 0
+
+        if min_samples is not None:
+            avg_true = ma.array(avg_true)
+            avg_true[bin_total < min_samples] = ma.masked
+            avg_pred = ma.array(avg_pred)
+            avg_pred[bin_total < min_samples] = ma.masked
+
+        name = legend[j] if legend else None
+        if show_bars:
+            ax1.bar(
+                x=bins[:-1][~zero_idx],
+                height=avg_true[~zero_idx],
+                align='edge',
+                width=(bins[1:] - bins[:-1])[~zero_idx],
+                edgecolor='black',
+                color=color_list[j],
+            )
+        else:
+            if ci is None:
+                ax1.plot(avg_pred, avg_true, fmt, label=name, color=color_list[j])
+            else:
+                nozero_intervals = proportion_confint(
+                    count=bin_true[~zero_idx],
+                    nobs=bin_total[~zero_idx],
+                    alpha=1 - ci,
+                    method=interval_method,
+                )
+                nozero_intervals = np.array(nozero_intervals)
+
+                intervals = np.empty((2, bin_total.shape[0]))
+                intervals.fill(np.nan)
+                intervals[:, ~zero_idx] = nozero_intervals
+
+                yerr = np.abs(intervals - avg_true)
+                if shaded_ci:
+                    ax1.fill_between(avg_pred, avg_true-yerr[0], avg_true+yerr[1], color=color_list[j], alpha=0.2)
+                    ax1.plot(avg_pred, avg_true, fmt, label=name, color=color_list[j])
+                else:
+                    ax1.errorbar(
+                        avg_pred, avg_true, yerr=yerr, label=name, fmt=fmt, color=color_list[j]
+                    )  # markersize=5)
+
+        if show_counts:
+            for ap, at, count in zip(avg_pred, avg_true, bin_total):
+                if np.isfinite(ap) and np.isfinite(at):
+                    ax1.text(
+                        ap,
+                        at,
+                        str(count),
+                        fontsize=6,
+                        ha='center',
+                        va='center',
+                        zorder=11,
+                        bbox=dict(boxstyle='square,pad=0.3', fc='white', ec=color_list[j]),
+                    )
+
+        if show_correction:
+            for ap, at in zip(avg_pred, avg_true):
+                ax1.arrow(
+                    ap,
+                    at,
+                    at - ap,
+                    0,
+                    color=color_gaps,
+                    head_width=0.02,
+                    length_includes_head=True,
+                    width=0.01,
+                )
+
+        if show_gaps:
+            for ap, at in zip(avg_pred, avg_true):
+                error = avg_pred - avg_true
+                negative_values = error < 0
+                ygaps = np.zeros(shape=(2, avg_true.shape[0]))
+                ygaps[0, negative_values] = -error[negative_values]
+                ygaps[1, ~negative_values] = error[~negative_values]
+                ax1.errorbar(
+                    avg_pred,
+                    avg_true,
+                    yerr=ygaps,
+                    fmt=" ",
+                    color=color_gaps,
+                    lw=4,
+                    capsize=5,
+                    capthick=1,
+                    zorder=10,
+                )
+
+        if sample_proportion > 0:
+            idx = np.random.choice(labels.shape[0], int(sample_proportion * labels.shape[0]))
+            ax1.scatter(
+                score[idx],
+                labels[idx],
+                marker='|',
+                s=100,
+                alpha=0.2,
+                color=color_list[j],
+            )
+
+        ax1.set_xlim((0, 1))
+        ax1.xaxis.set_major_locator(MultipleLocator(.20))
+        ax1.xaxis.set_minor_locator(MultipleLocator(.10))
+        ax1.set_ylim((0, 1))
+        ax1.yaxis.set_major_locator(MultipleLocator(.20))
+        ax1.yaxis.set_minor_locator(MultipleLocator(.10))
+        if not show_histogram or overlay_histogram:
+            ax1.set_xlabel('Average score')
+        elif show_histogram:
+            ax1.set_xticklabels([])
+        ax1.set_ylabel('Fraction of positives')
+        ax1.grid(which='both')
+        # ax1.set_aspect(1)
+        ax1.set_axisbelow(True)
+
+        if show_histogram:
+
+            if overlay_histogram:
+                ax2 = ax1.twinx()
+            else:
+                divider = make_axes_locatable(ax1)
+                ax2 = divider.append_axes("bottom", size="20%", pad=0.1, sharex=ax1)
 
-    if axis is None:
-        axis = plt.figure(figsize=(5, 5)).add_subplot(111)
 
-    axis.plot(fpr, tpr, linewidth=1, label='ROC curve (area = %0.2f)' % roc_auc)
+            # ax2 = fig.add_subplot(spec[1], label='{}'.format(i))
+            for j, score in enumerate(scores_list):
+                # lines = ax1.get_lines()
+                # ax2.set_xticklabels([])
+
+                name = legend[j] if legend else None
+                if n_scores > 1:
+                    kwargs = {'histtype': 'step', 'edgecolor': color_list[j]}
+                else:
+                    kwargs = {'histtype': 'bar', 'edgecolor': 'black', 'color': color_list[j]}
+                if overlay_histogram:
+                    kwargs = {**kwargs, 'alpha': 0.4 }
+
+                ax2.hist(score, range=(0, 1), bins=bins, label=name, lw=1, **kwargs)
+                ax2.set_xlim((0, 1))
+                ax2.set_xlabel('Average score')
+                ax2.yaxis.set_major_locator(MaxNLocator(integer=True, prune='upper', nbins=3))
+            ax2.set_ylabel('Count')
+            if not overlay_histogram:
+                ytickloc = ax2.get_yticks()
+                ax2.yaxis.set_major_locator(mticker.FixedLocator(ytickloc))
+                yticklabels = ['{:0.0f}'.format(value) for value in ytickloc]
+                ax2.set_yticklabels(labels=yticklabels, fontdict=dict(verticalalignment='top'))
+                ax2.grid(True, which='both')
+                ax2.set_axisbelow(True)
+            if invert_histogram:
+                ylim = ax2.get_ylim()
+                ax2.set_ylim(reversed(ylim))
+
+    if legend is not None:
+        lines, labels = fig.axes[0].get_legend_handles_labels()
+        fig.legend(
+            lines,
+            labels,
+            loc='upper center',
+            bbox_to_anchor=(0, 0, 1, 1),
+            bbox_transform=fig.transFigure,
+            ncol=6,
+        )
+
+    fig.align_labels()
+    return fig
+
+
+def plot_roc_curve(
+    labels,
+    scores,
+    legend=None,
+    color_list=None,
+    fmt='-',
+    fig=None,
+    ax=None
+):
+    """Plot the ROC curve of the given scores and true labels.
+
+    Parameters
+    ----------
+    labels : array (n_samples, )
+        Labels indicating the true class.
+    scores : array (n_samples,) or list of matrices
+        Output probability scores for one or several methods.
+    legend : list of strings or None
+        Text to use for the legend.
+    color_list : list of strings or None
+        List of string colors indicating the color of each method.
+    fmt : string (default: 's-')
+        Format of the lines following the matplotlib.pyplot.plot standard.
+    fig : matplotlib.pyplot.Figure or None
+        Figure to use for the plots, if None a new figure is created.
+
+    Returns
+    -------
+    fig : matplotlib.pyplot.figure
+        Figure with the ROC curve
+    """
+    if isinstance(scores, list):
+        scores_list = scores
+    else:
+        scores_list = [
+            scores,
+        ]
+
+    if color_list is None:
+        color_list = plt.rcParams['axes.prop_cycle'].by_key()['color']
+
+    if fig is None:
+        fig = plt.figure(figsize=(4, 4))
+    spec = gridspec.GridSpec(ncols=1, nrows=1, hspace=0.04, left=0.15)
+
+    if ax is None:
+        ax = fig.add_subplot(spec[0])
+
+    for j, score in enumerate(scores_list):
+        fpr, tpr, _ = roc_curve(labels, score)
+        roc_auc = auc(fpr, tpr)
+        name = f"{legend[j]} (AUC = {roc_auc:.2f})" if legend else None
+
+        ax.plot(fpr, tpr, fmt, linewidth=1, label=name, color=color_list[j])
 
     # reference line, legends, and axis labels
-    axis.plot([0, 1], [0, 1], linestyle='--', color='gray')
-    axis.set_title('ROC curve')
-    axis.set_xlabel('False Positive Rate')
-    axis.set_ylabel('True Positive Rate')
-    axis.set_xlim(0, 1)
-    axis.xaxis.set_major_locator(MultipleLocator(0.20))
-    axis.xaxis.set_minor_locator(MultipleLocator(0.10))
-    axis.set_ylim(0, 1)
-    axis.yaxis.set_major_locator(MultipleLocator(0.20))
-    axis.yaxis.set_minor_locator(MultipleLocator(0.10))
-    axis.grid(which='both')
-
-    # sns.despine()
+    ax.plot([0, 1], [0, 1], linestyle='--', color='gray')
+    ax.set_xlabel('False Positive Rate')
+    ax.set_ylabel('True Positive Rate')
+    ax.set_xlim(0, 1)
+    ax.xaxis.set_major_locator(MultipleLocator(0.20))
+    ax.xaxis.set_minor_locator(MultipleLocator(0.10))
+    ax.set_ylim(0, 1)
+    ax.yaxis.set_major_locator(MultipleLocator(0.20))
+    ax.yaxis.set_minor_locator(MultipleLocator(0.10))
+    ax.grid(which='both')
+
     # plt.gca().xaxis.set_ticks_position('none')
     # plt.gca().yaxis.set_ticks_position('none')
-    plt.gca().legend()
 
-    axis.legend(loc='lower right')
-    plt.tight_layout()
+    if legend is not None:
+        lines, labels = fig.axes[0].get_legend_handles_labels()
+        fig.legend(
+            lines,
+            labels,
+            loc='lower right',
+            bbox_to_anchor=(0, 0, 1, 1),
+            bbox_transform=fig.transFigure,
+            ncol=6,
+        )
+
+    fig.align_labels()
+    return fig
 
 
 def plot_heatmap(model, data, axis=None):
+    import matplotsoccer as mps
+
     if axis is None:
         axis = plt.figure(figsize=(8, 10)).add_subplot(111)
 
diff --git a/soccer_xg/xg.py b/soccer_xg/xg.py
index 5de3d77..4f6d26e 100644
--- a/soccer_xg/xg.py
+++ b/soccer_xg/xg.py
@@ -2,14 +2,14 @@
 from __future__ import annotations
 
 import warnings
-from typing import Tuple, List, Optional
 from pathlib import Path
+from typing import Callable, Literal
 
 import joblib
 import pandas as pd
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import brier_score_loss, roc_auc_score
-from sklearn.pipeline import make_pipeline
+from sklearn.pipeline import Pipeline, make_pipeline
 from sklearn.utils.validation import NotFittedError
 from tqdm.auto import tqdm
 
@@ -17,6 +17,7 @@
 from soccer_xg import metrics
 from soccer_xg.data.base import Dataset
 from soccer_xg.ml.preprocessing import simple_proc_for_linear_algoritms
+from soccer_xg.ml.pipeline import InteractionFeature
 
 
 class XGModel:
@@ -24,22 +25,17 @@ class XGModel:
 
     Parameters
     ----------
-    copy_data : boolean (default=``True``)
-        Whether or not to copy data when fitting and applying the model.
-        Running the model in-place (``copy_data=False``) will be faster and
-        have a smaller memory footprint, but if not done carefully can lead to
-        data integrity issues.
-
-    Attributes
-    ----------
-    model : A Scikit-learn pipeline (or equivalent)
-        The actual model used to compute xG. Upon initialization it will be
-        set to a default model, but can be overridden by the user.
+    dataset_transformer : DatasetTransformer
+        A dataset transformer to convert a dataset to features.
+    pipeline : Pipeline
+        Scikit-Learn pipeline to use for the model.
     column_descriptions : dictionary
         A dictionary whose keys are the names of the columns used in the
         model, and the values are string descriptions of what the columns
-        mean. Set at initialization to be the default model, if you create
-        your own model you'll need to update this attribute manually.
+        mean.
+
+    Attributes
+    ----------
     training_seasons : A list of tuples, or ``None`` (default=``None``)
         If the model was trained using data from a Dataset, a list of
         (competition_id, season_id) tuples used to train the model. If no
@@ -63,11 +59,14 @@ class XGModel:
     model_directory = Path(__file__).resolve().parent / "models"
     _default_model_filename = "default_model.xg"
 
-    def __init__(self, copy_data: bool = True):
-        self.copy_data = copy_data
-        self.column_descriptions = None
-
-        self.model = self.create_default_pipeline()
+    def __init__(
+        self,
+        dataset_transformer: DatasetTransformer,
+        pipeline: Pipeline,
+        column_descriptions: dict[str, str] | None = None,
+    ):
+        self.dataset_transformer = dataset_transformer
+        self.pipeline = pipeline
         self._fitted = False
         self._training_seasons = None
         self._validation_seasons = None
@@ -77,27 +76,43 @@ def __init__(self, copy_data: bool = True):
         self._num_shots_used = None
 
     @property
-    def training_seasons(self) -> Optional[List[Tuple[str, str]]]:
+    def training_seasons(self) -> list[tuple[str, str]] | None:
         return self._training_seasons
 
     @property
-    def validation_seasons(self):
+    def validation_seasons(self) -> list[tuple[str, str]] | None:
         return self._validation_seasons
 
     @property
-    def sample_probabilities(self):
+    def sample_probabilities(self) -> list[float] | None:
         return self._sample_probabilities
 
     @property
-    def predicted_goal_percents(self):
+    def predicted_goal_percents(self) -> list[float] | None:
         return self._predicted_goal_percents
 
     @property
-    def num_shots_used(self):
+    def num_shots_used(self) -> int | None:
         return self._num_shots_used
 
     @classmethod
-    def filter_shots(cls, df_actions):
+    def filter_shots(cls, df_actions) -> pd.Series:
+        """Return a boolean mask indicating which shots to handle.
+
+        This method is used to filter out shots that should not be used
+        for training, validation, or prediction. By default, it filters
+        out own-goals only.
+
+        Parameters
+        ----------
+        df_actions : pd.DataFrame
+            A dataframe containing the SPADL actions.
+
+        Returns
+        -------
+        pd.Series
+            A boolean mask indicating which shots to handle.
+        """
         shot_mask = df_actions.type_name.isin(
             ["shot", "shot_penalty", "shot_freekick"]
         ) & df_actions.result_name.isin(["fail", "success"])
@@ -106,30 +121,34 @@ def filter_shots(cls, df_actions):
     def train(
         self,
         source_data: Dataset | pd.DataFrame,
-        training_seasons: List[Tuple[str, str]] = (("ENG", "1617"), ("ENG", "1718")),
         target_colname: str = "goal",
+        training_seasons: list[tuple[str, str]] | None = None,
     ):
         """Train the model.
 
         Once a modeling pipeline is set up (either the default or something
         custom-generated), historical data needs to be fed into it in order to
         "fit" the model so that it can then be used to predict future results.
-        This method implements a simple wrapper around the core Scikit-learn functionality
-        which does this.
+        This method implements a simple wrapper around the core Scikit-learn
+        functionality which does this.
 
-        The default is to use data from a Dataset object, however that can be changed
-        to a simple Pandas DataFrame with precomputed features and labels if desired.
+        The default is to use data from a Dataset object, however that can be
+        changed to a simple Pandas DataFrame with precomputed features and
+        labels if desired.
 
-        There is no particular output from this function, rather the parameters governing
-        the fit of the model are saved inside the model object itself. If you want to get an
-        estimate of the quality of the fit, use the ``validate_model`` method after running
-        this method.
+        There is no particular output from this function, rather the
+        parameters governing the fit of the model are saved inside the model
+        object itself. If you want to get an estimate of the quality of the
+        fit, use the ``validate_model`` method after running this method.
 
         Parameters
         ----------
         source_data : ``Dataset`` or a Pandas DataFrame
             The data to be used to train the model. If an instance of
-            ``Dataset`` is given, will query the api database for the training data.
+            ``Dataset`` is given, will query the database for the training data.
+        target_colname : string or integer (default=``"goal"``)
+            The name of the target variable column. This is only relevant if
+            ``source_data`` is not a ``Dataset``.
         training_seasons : list of tuples (default=``[('ENG', '1617'), ('ENG', '1718')]``)
             What seasons to use to train the model if getting data from a Dataset instance.
             If ``source_data`` is not a ``Dataset``, this argument will be ignored.
@@ -137,40 +156,35 @@ def train(
             model - some will need to be reserved for a final validation (see the
             ``validate_model`` method). A good dataset to reserve
             for validation is the most recent one or two seasons.
-        target_colname : string or integer (default=``"goal"``)
-            The name of the target variable column. This is only relevant if
-            ``source_data`` is not a ``Dataset``.
 
         Returns
         -------
         ``None``
         """
-        if isinstance(self.model, list):
-            for model in self.model:
-                model.train(source_data, training_seasons, target_colname)
+        self._training_seasons = []
+        if isinstance(source_data, Dataset):
+            game_ids = pd.concat(
+                source_data.games(competition_id=s[0], season_id=s[1]) for s in training_seasons
+            ).index.tolist()
+            feature_cols, target_col = self.dataset_transformer.transform(
+                dataset=source_data,
+                game_ids=game_ids,
+            )
+            self._training_seasons = training_seasons
         else:
-            self._training_seasons = []
-            if isinstance(source_data, Dataset):
-                game_ids = pd.concat(
-                    source_data.games(competition_id=s[0], season_id=s[1])
-                    for s in training_seasons
-                ).index
-                feature_cols, target_col = prepare(
-                    source_data, game_ids, shotfilter=self.filter_shots
-                )
-                self._training_seasons = training_seasons
-            else:
-                target_col = source_data[target_colname]
-                feature_cols = source_data.drop(target_colname, axis=1)
-            self.model.fit(feature_cols, target_col)
+            target_col = source_data[target_colname]
+            feature_cols = source_data.drop(target_colname, axis=1)
+        self.pipeline.fit(feature_cols, target_col.squeeze())
         self._fitted = True
 
     def validate(
         self,
-        source_data,
-        validation_seasons=(("ENG", "1819")),
-        target_colname="goal",
-        plot=True,
+        source_data: Dataset | pd.DataFrame,
+        target_colname: str = "goal",
+        validation_seasons: list[tuple[str, str]] | None = None,
+        n_bins=10,
+        bin_strategy="quantile",
+        plot: bool = True,
     ):
         """Validate the model.
 
@@ -202,6 +216,15 @@ def validate(
         target_colname : string or integer (default=``"goal"``)
             The name of the target variable column. This is only relevant if
             ``source_data`` is not a ``Dataset``.
+        n_bins : int, default=10
+            Number of bins to discretize the [0, 1] interval. A bigger number
+            requires more data.
+        strategy : {'uniform', 'quantile'}, default='uniform'
+            Strategy used to define the widths of the bins.
+            uniform
+                The bins have identical widths.
+            quantile
+                The bins have the same number of samples and depend on `y_prob`.
         plot: bool (default=true)
             Whether to plot the AUROC and probability calibration curves.
 
@@ -222,23 +245,26 @@ def validate(
         if isinstance(source_data, Dataset):
             game_ids = pd.concat(
                 source_data.games(competition_id=s[0], season_id=s[1]) for s in validation_seasons
-            ).index
-            _, target_col = prepare(source_data, game_ids)
+            ).index.tolist()
+            feature_cols, target_col = self.dataset_transformer.transform(
+                dataset=source_data, game_ids=game_ids
+            )
             self._validation_seasons = validation_seasons
         else:
             game_ids = None
+            feature_cols = source_data.drop(target_colname, axis=1)
             target_col = source_data[target_colname]
             self._validation_seasons = []
 
-        df_predictions = self.estimate(source_data, game_ids)
-        predicted_probabilities = df_predictions["xG"]
-        target_col = target_col.loc[df_predictions.index]
+        predicted_probabilities = self.estimate(feature_cols)["xG"]
+        target_col = target_col.squeeze()
 
         (
-            self._sample_probabilities,
             self._predicted_goal_percents,
+            self._sample_probabilities,
+            _,
             self._num_shots_used,
-        ) = metrics.bayesian_calibration_curve(target_col.values, predicted_probabilities)
+        ) = metrics.calibration_curve(target_col, predicted_probabilities, n_bins, bin_strategy)
 
         # Compute the maximal deviation from a perfect prediction as well as the area under the
         # curve of the residual between |predicted - perfect|:
@@ -251,28 +277,45 @@ def validate(
         roc = roc_auc_score(target_col, predicted_probabilities)
         brier = brier_score_loss(target_col, predicted_probabilities)
         ece = metrics.expected_calibration_error(
-            target_col, predicted_probabilities, 10, "uniform"
+            target_col, predicted_probabilities, n_bins, "uniform"
         )
         ace = metrics.expected_calibration_error(
-            target_col, predicted_probabilities, 10, "quantile"
+            target_col, predicted_probabilities, n_bins, "quantile"
         )
 
         if plot:
             import matplotlib.pyplot as plt
 
             from soccer_xg.visualisation import (
-                plot_calibration_curve,
+                plot_reliability_diagram,
                 plot_roc_curve,
             )
 
             fig, ax = plt.subplots(1, 2, figsize=(10, 5))
-            plot_roc_curve(target_col, predicted_probabilities, axis=ax[0])
-            plot_calibration_curve(
+            plot_roc_curve(target_col, predicted_probabilities, fig=fig, ax=ax[0])
+            plot_reliability_diagram(
                 target_col,
                 predicted_probabilities,
+                fig=fig,
+                ax=ax[1],
+                bayesian=False,
+                bins=n_bins,
+                bin_strategy=bin_strategy,
+                show_counts=False,
+                fmt='s-',
                 min_samples=100,
-                axis=ax[1],
+                show_histogram=False,
+                overlay_histogram=False,
+                invert_histogram=False,
+                ci=0.95,
+                shaded_ci=True,
+                show_gaps=False,
+                show_bars=False,
             )
+            ax[0].set_title("ROC curve")
+            ax[1].set_title("Reliability diagram")
+            plt.tight_layout()
+            plt.show()
 
         return {
             "max_dev": max_deviation,
@@ -281,10 +324,9 @@ def validate(
             "brier": brier,
             "ece": ece,
             "ace": ace,
-            "fig": fig if plot else None,
         }
 
-    def estimate(self, source_data, game_ids=None):
+    def estimate(self, source_data: Dataset | pd.DataFrame, game_ids: list[int] | None = None):
         """Estimate the xG values for all shots in a set of games.
 
         The default is to use data from a Dataset object, however that can be changed
@@ -315,32 +357,15 @@ def estimate(self, source_data, game_ids=None):
         if not self._fitted:
             raise NotFittedError("Must fit model before predicting WP.")
 
-        if isinstance(self.model, list):
-            xg = []
-            for model in self.model:
-                xg.append(model.estimate(source_data, game_ids))
-            return pd.concat(xg).sort_index()
-        else:
-            if isinstance(source_data, Dataset):
-                game_ids = source_data.games().index if game_ids is None else game_ids
-                source_data, _ = prepare(source_data, game_ids)
-
-            xg = pd.DataFrame(index=source_data.index)
-            xg["xG"] = self.model.predict_proba(source_data)[:, 1]
-            return xg
-
-    def create_default_pipeline(self):
-        """Create the default xG estimation pipeline.
+        if isinstance(source_data, Dataset):
+            game_ids = source_data.games().index.tolist() if game_ids is None else game_ids
+            source_data, _ = self.dataset_transformer.transform(
+                dataset=source_data, game_ids=game_ids
+            )
 
-        Returns
-        -------
-        Scikit-learn pipeline
-            The default pipeline, suitable for computing xG
-            but by no means the best possible model.
-        """
-        models = [OpenplayXGModel(), FreekickXGModel(), PenaltyXGModel()]
-        self.column_descriptions = {m.__class__.__name__: m.column_descriptions for m in models}
-        return models
+        xg = pd.DataFrame(index=source_data.index)
+        xg["xG"] = self.pipeline.predict_proba(source_data)[:, 1]
+        return xg
 
     def save_model(self, filename=None):
         """Save the XGModel instance to disk.
@@ -386,58 +411,29 @@ def load_model(cls, filename=None):
         return joblib.load(cls.model_directory / filename)
 
 
-class OpenplayXGModel(XGModel):
-    _default_model_filename = "default_openplay_model.xg"
-
-    def create_default_pipeline(self):
-        bodypart_colname = "bodypart_id_a0"
-        dist_to_goal_colname = "start_dist_to_goal_a0"
-        angle_to_goal_colname = "start_angle_to_goal_a0"
-
-        self.column_descriptions = {
-            bodypart_colname: "Bodypart used for the shot (head, foot or other)",
-            dist_to_goal_colname: "Distance to goal",
-            angle_to_goal_colname: "Angle to goal",
-        }
-
-        preprocess_pipeline = simple_proc_for_linear_algoritms(
-            [dist_to_goal_colname, angle_to_goal_colname], [bodypart_colname]
-        )
-        base_model = LogisticRegression(max_iter=10000, solver="lbfgs", fit_intercept=False)
-        pipe = make_pipeline(preprocess_pipeline, base_model)
-        return pipe
-
-    @classmethod
-    def filter_shots(cls, df_actions):
-        shot_idx = (df_actions.type_name == "shot") & df_actions.result_name.isin(
-            ["fail", "success"]
-        )
-        return shot_idx
-
-
 class PenaltyXGModel(XGModel):
     _default_model_filename = "default_penalty_model.xg"
 
-    def __init__(self, copy_data=True):
-        super().__init__(copy_data)
+    def __init__(self):
         self._fitted = True
+        self.dataset_transformer = DatasetTransformer(
+            xfns=[], shotfilter=lambda x: x.type_name == "shot_penalty"
+        )
+        self.pipeline = Pipeline([])
 
     def train(
         self,
-        source_data,
-        training_seasons=(("ENG", "1617"), ("ENG", "1718")),
-        target_colname="goal",
+        source_data: Dataset | pd.DataFrame,
+        target_colname: str = "goal",
+        training_seasons: list[tuple[str, str]] | None = None,
     ):
         pass
 
-    def estimate(self, source_data, game_ids=None):
+    def estimate(self, source_data: Dataset | pd.DataFrame, game_ids: list[int] | None = None):
         if isinstance(source_data, Dataset):
-            game_ids = source_data.games.index if game_ids is None else game_ids
-            source_data, _ = prepare(
-                source_data,
-                game_ids,
-                xfns=[],
-                shotfilter=PenaltyXGModel.filter_shots,
+            game_ids = source_data.games().index.tolist() if game_ids is None else game_ids
+            source_data, _ = self.dataset_transformer.transform(
+                dataset=source_data, game_ids=game_ids
             )
 
         xg = pd.DataFrame(index=source_data.index)
@@ -445,103 +441,286 @@ def estimate(self, source_data, game_ids=None):
 
         return xg
 
-    def create_default_pipeline(self):
-        return None
-
-    @classmethod
-    def filter_shots(cls, df_actions):
-        shot_idx = df_actions.type_name == "shot_penalty"
-        return shot_idx
-
 
 class FreekickXGModel(XGModel):
     _default_model_filename = "default_freekick_model.xg"
 
-    def create_default_pipeline(self):
-        dist_to_goal_colname = "start_dist_to_goal_a0"
-        angle_to_goal_colname = "start_angle_to_goal_a0"
+    def __init__(self):
+        self.dataset_transformer = DatasetTransformer(
+            xfns=[fs.shot_dist, fs.shot_visible_angle],
+            shotfilter=lambda x: x.type_name == "shot_freekick",
+        )
+        self.pipeline = self._build_pipeline()
+
+    def _build_pipeline(self) -> Pipeline:
+        dist_colname = "dist_shot"
+        angle_colname = "visible_angle_shot"
+        dist_x_angle_colname = "dist_x_visible_angle_shot"
 
         self.column_descriptions = {
-            dist_to_goal_colname: "Distance to goal",
-            angle_to_goal_colname: "Angle to goal",
+            dist_colname: "Distance to goal",
+            angle_colname: "Angle to goal",
+            dist_x_angle_colname: "Distance * angle to goal",
         }
 
+        feature_pipeline = InteractionFeature([dist_colname, angle_colname], dist_x_angle_colname)
         preprocess_pipeline = simple_proc_for_linear_algoritms(
-            [dist_to_goal_colname, angle_to_goal_colname], []
+            numeric_features=[dist_colname, angle_colname, dist_x_angle_colname],
+            categoric_features=[],
         )
         base_model = LogisticRegression(max_iter=10000, solver="lbfgs", fit_intercept=True)
-        pipe = make_pipeline(preprocess_pipeline, base_model)
-        return pipe
+        return make_pipeline(feature_pipeline, preprocess_pipeline, base_model)
+
+
+class BasicOpenplayXGModel(XGModel):
+    _default_model_filename = "default_openplay_model.xg"
+
+    def __init__(self):
+        self.dataset_transformer = DatasetTransformer(
+            xfns=[fs.shot_dist, fs.shot_visible_angle, fs.shot_bodypart],
+            shotfilter=lambda x: x.type_name == 'shot' and x.result_name in ["fail", "success"],
+        )
+        self.pipeline = self._build_pipeline()
+
+    def _build_pipeline(self) -> Pipeline:
+        bodypart_colname = "bodypart_name_shot"
+        dist_colname = "dist_shot"
+        angle_colname = "visible_angle_shot"
+        dist_x_angle_colname = "dist_x_visible_angle_shot"
+
+        self.column_descriptions = {
+            bodypart_colname: "Bodypart used for the shot (head, foot or other)",
+            dist_colname: "Distance to goal",
+            angle_colname: "Angle to goal",
+            dist_x_angle_colname: "Distance * angle to goal",
+        }
+
+        feature_pipeline = InteractionFeature([dist_colname, angle_colname], dist_x_angle_colname)
+        preprocess_pipeline = simple_proc_for_linear_algoritms(
+            numeric_features=[dist_colname, angle_colname, dist_x_angle_colname],
+            categoric_features=[bodypart_colname],
+        )
+        base_model = LogisticRegression(max_iter=10000, solver="lbfgs", fit_intercept=False)
+        return make_pipeline(feature_pipeline, preprocess_pipeline, base_model)
+
+
+class AdvancedOpenplayXGModel(XGModel):
+    _default_model_filename = "default_openplay_model.xg"
+
+    def __init__(self):
+        self.dataset_transformer = DatasetTransformer(
+            xfns=[fs.shot_dist, fs.shot_visible_angle, fs.shot_bodypart],
+            shotfilter=lambda x: x.type_name == 'shot' and x.result_name in ["fail", "success"],
+        )
+        self.pipeline = self._build_pipeline()
+
+    def _build_pipeline(self) -> Pipeline:
+        bodypart_colname = "bodypart_name_shot"
+        dist_colname = "dist_shot"
+        angle_colname = "visible_angle_shot"
+        dist_x_angle_colname = "dist_x_visible_angle_shot"
+
+        self.column_descriptions = {
+            bodypart_colname: "Bodypart used for the shot (head, foot or other)",
+            dist_colname: "Distance to goal",
+            angle_colname: "Angle to goal",
+            dist_x_angle_colname: "Distance * angle to goal",
+        }
+
+        preprocess_pipeline = simple_proc_for_linear_algoritms(
+            numeric_features=[dist_colname, angle_colname, dist_x_angle_colname],
+            categoric_features=[bodypart_colname],
+        )
+        base_model = LogisticRegression(max_iter=10000, solver="lbfgs", fit_intercept=False)
+        return make_pipeline(preprocess_pipeline, base_model)
+
+
+class StatsBombOpenplayXGModel(XGModel):
+    _default_model_filename = "default_openplay_model.xg"
+
+    def __init__(self):
+        self.dataset_transformer = DatasetTransformer(
+            xfns=[fs.shot_dist, fs.shot_visible_angle, fs.shot_bodypart],
+            shotfilter=lambda x: x.type_name == 'shot' and x.result_name in ["fail", "success"],
+        )
+        self.pipeline = self._build_pipeline()
+
+    def _build_pipeline(self) -> Pipeline:
+        bodypart_colname = "bodypart_name_shot"
+        dist_colname = "dist_shot"
+        angle_colname = "visible_angle_shot"
+        dist_x_angle_colname = "dist_x_visible_angle_shot"
+
+        self.column_descriptions = {
+            bodypart_colname: "Bodypart used for the shot (head, foot or other)",
+            dist_colname: "Distance to goal",
+            angle_colname: "Angle to goal",
+            dist_x_angle_colname: "Distance * angle to goal",
+        }
+
+        feature_pipeline = make_pipeline(
+            InteractionFeature([dist_colname, angle_colname], dist_x_angle_colname)
+        )
+        preprocess_pipeline = simple_proc_for_linear_algoritms(
+            numeric_features=[dist_colname, angle_colname, dist_x_angle_colname],
+            categoric_features=[bodypart_colname],
+        )
+        base_model = LogisticRegression(max_iter=10000, solver="lbfgs", fit_intercept=False)
+        return make_pipeline(feature_pipeline, preprocess_pipeline, base_model)
+
+
+class XGModelEnsemble:
+    def __init__(self, models=None):
+        super().__init__()
+        if models is None:
+            models = [BasicOpenplayXGModel(), FreekickXGModel(), PenaltyXGModel()]
+        self.model = models
+        self.column_descriptions = {m.__class__.__name__: m.column_descriptions for m in models}
+
+    def train(self, source_data, training_seasons, target_colname="goal"):
+        for model in self.model:
+            model.train(source_data, training_seasons, target_colname)
+
+    def validate(self, source_data, validation_seasons, target_colname="goal", plot=True):
+        results = {}
+        for model in self.model:
+            results.update(model.validate(source_data, validation_seasons, target_colname, plot))
+        return results
+
+    def estimate(self, source_data, game_ids=None):
+        xg = []
+        for model in self.model:
+            xg.append(model.estimate(source_data, game_ids))
+        return pd.concat(xg).sort_index()
+
+    def save_model(self, filename=None):
+        if filename is None:
+            filename = self._default_model_filename
+        for i, model in enumerate(self.model):
+            model.save_model(filename=f"{filename}_{i}")
 
     @classmethod
-    def filter_shots(cls, df_actions):
-        shot_idx = df_actions.type_name == "shot_freekick"
-        return shot_idx
-
-
-def prepare(
-    dataset: Dataset,
-    game_ids=None,
-    xfns=fs.default_features,
-    yfns=fs.default_labels,
-    shotfilter=None,
-    nb_prev_actions=3,
-    on_fail="raise",
-):
-    """Prepare a dataset for training and validation.
+    def load_model(cls, filename=None):
+        if filename is None:
+            filename = cls._default_model_filename
+
+        models = []
+        i = 0
+        while True:
+            try:
+                models.append(joblib.load(cls.model_directory / f"{filename}_{i}"))
+                i += 1
+            except FileNotFoundError:
+                break
+        return cls(models)
+
+
+def is_shot(action) -> bool:
+    """Return a boolean mask indicating which shots to handle.
+
+    This method is used to filter out shots that should not be used
+    for training, validation, or prediction. By default, it filters
+    out own-goals only.
+
+    Parameters
+    ----------
+    df_actions : pd.Series
+        A dataframe containing the SPADL actions.
+
+    Returns
+    -------
+    pd.Series
+        A boolean mask indicating which shots to handle.
+    """
+    return (action.type_name in ["shot", "shot_penalty", "shot_freekick"]) and (
+        action.result_name in ["fail", "success"]
+    )
+
+
+class DatasetTransformer:
+    """Transforms a dataset to xG features and labels.
 
     Parameters
     ----------
-    dataset : Dataset
-        The dataset to use.
-    game_ids : list of ints (default=None)
-        Only use data from the games in this list. By default, all games
-        in the dataset are used.
     xfns : list(callable)
         List of feature generators to apply. Defaults to ``default_features``.
     yfns : list(callable)
         List of label generators to apply. Defaults to ``default_labels``.
     shotfilter: callable(pd.Series) -> bool
-        A function that takes a shot (in SPADL format) and returns True if the
-        shot should be used for feature extraction. If None, all shots will be
-        used (excluding own-goals).
+        A function that takes a SPADL action and returns True if the
+        action should be used for feature extraction. If None, all shots will
+        be used (excluding own-goals).
     nb_prev_actions: int
         The number of previous actions to consider when calculating labels
-    on_fail: 'raise' or 'warn'
-        What to do if a feature or label function fails on a specific game.
-
-    Returns
-    -------
-    X : pd.DataFrame
-        A dataframe containing the features.
-    y : pd.DataFrame
-        A dataframe containing the labels.
     """
-    game_ids = dataset.games().index if game_ids is None else game_ids
-    X, y = {}, {}
-    for game_id in tqdm(game_ids, desc="Preparing dataset"):
-        try:
-            game = dataset.games().loc[game_id]
-            game_actions = dataset.actions(game_id)
-            game_events = dataset.events(game_id)
-            X[game_id], y[game_id] = fs.compute_attributes(
-                game,
-                game_actions,
-                events=game_events,
-                xfns=xfns,
-                yfns=yfns,
-                shotfilter=shotfilter,
-                nb_prev_actions=nb_prev_actions,
-            )
-            X[game_id]["game_id"] = game_id
-            y[game_id]["game_id"] = game_id
-        except Exception as e:
-            if on_fail == "warn":
-                warnings.warn(f"Failed for game with id={game_id}: {e}")
-            else:
-                raise RuntimeError(f"Failed for game with id={game_id}.") from e
-    X = pd.concat(X.values()).reset_index().set_index(["game_id", "action_id"])
-    # remove post-shot features (these will all have a single unique value)
-    f = X.columns[X.nunique() > 1]
-    y = pd.concat(y.values()).reset_index().set_index(["game_id", "action_id"])
-    return X[f], y
+
+    def __init__(
+        self,
+        xfns: list[fs.AttributeGenerator | str] = fs.default_features,
+        yfns: list[fs.AttributeGenerator | str] = fs.default_labels,
+        shotfilter: Callable[[pd.Series], bool] = is_shot,
+        nb_prev_actions: int = 3,
+    ):
+        self.xfns = xfns
+        self.yfns = yfns
+        self.shotfilter = shotfilter
+        self.nb_prev_actions = nb_prev_actions
+
+    def transform(
+        self,
+        dataset: Dataset,
+        game_ids: list[int] | None = None,
+        on_fail: Literal["raise", "warn"] = "raise",
+    ) -> tuple[pd.DataFrame, pd.DataFrame]:
+        """Prepare a dataset for training and validation.
+
+        Parameters
+        ----------
+        dataset : Dataset
+            The dataset to use.
+        game_ids : list of ints (default=None)
+            Only use data from the games in this list. By default, all games
+            in the dataset are used.
+        on_fail: 'raise' or 'warn'
+            What to do if a feature or label function fails on a specific game.
+
+        Returns
+        -------
+        X : pd.DataFrame
+            A dataframe containing the features.
+        y : pd.DataFrame
+            A dataframe containing the labels.
+        """
+        games = dataset.games()
+        game_ids = games.index.tolist() if game_ids is None else game_ids
+        X, y = {}, {}
+        for game_id in tqdm(game_ids, desc="Preparing dataset"):
+            try:
+                game = games.loc[game_id]
+                game_actions = dataset.actions(game_id)
+                game_events = dataset.events(game_id)
+                _X, _y = fs.compute_attributes(
+                    game,
+                    game_actions,
+                    events=game_events,
+                    xfns=self.xfns,
+                    yfns=self.yfns,
+                    shotfilter=self.shotfilter,
+                    nb_prev_actions=self.nb_prev_actions,
+                )
+                _X["game_id"] = game_id
+                _y["game_id"] = game_id
+                if len(_X) and len(_y):
+                    X[game_id] = _X
+                    y[game_id] = _y
+            except Exception as e:
+                if on_fail == "warn":
+                    warnings.warn(f"Failed for game with id={game_id}: {e}", stacklevel=2)
+                else:
+                    raise RuntimeError(f"Failed for game with id={game_id}.") from e
+        X = pd.concat(X.values()).reset_index().set_index(["game_id", "action_id"])
+        # remove post-shot features (these will all have a single unique value)
+        # f = X.columns[X.nunique() > 1]
+        f = X.columns
+        y = pd.concat(y.values()).reset_index().set_index(["game_id", "action_id"])
+        return X[f], y