From 9ea1105442e5b17a594a8c87d16e3bd8542f27b6 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Fri, 19 Jul 2024 22:33:42 +0200 Subject: [PATCH] changes to class names, and tutorials --- .gitignore | 2 + examples/0_getting_started.ipynb | 133 +++++++ ...ipynb => 1_tutorial_graph_converter.ipynb} | 354 ++++++++---------- tests/test_kloppy.py | 4 +- unravel/soccer/graphs/graph_converter.py | 8 +- unravel/soccer/graphs/objects/__init__.py | 2 +- ..._dataset.py => custom_spektral_dataset.py} | 10 +- 7 files changed, 303 insertions(+), 210 deletions(-) create mode 100644 examples/0_getting_started.ipynb rename examples/{getting_started.ipynb => 1_tutorial_graph_converter.ipynb} (76%) rename unravel/soccer/graphs/objects/{custom_graph_dataset.py => custom_spektral_dataset.py} (94%) diff --git a/.gitignore b/.gitignore index 0a27a48..def68e3 100644 --- a/.gitignore +++ b/.gitignore @@ -172,6 +172,8 @@ dev.py bug.py scratch.py TODO.md +BUILD.md +build.py /.data /pickle_files diff --git a/examples/0_getting_started.ipynb b/examples/0_getting_started.ipynb new file mode 100644 index 0000000..1c18e55 --- /dev/null +++ b/examples/0_getting_started.ipynb @@ -0,0 +1,133 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🌀 It's all starting to unravel!\n", + "\n", + "First run `pip install unravelsports` if you haven't already!\n", + "\n", + "\n", + "-----\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install unravelsports --quiet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Quick Start" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing frames: 100%|██████████| 500/500 [00:02<00:00, 221.19it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading 477 graphs into CustomSpektralDataset...\n", + "Loading 477 graphs into CustomSpektralDataset...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "from unravel.soccer import GraphConverter, CustomSpektralDataset\n", + "\n", + "from kloppy import skillcorner\n", + "\n", + "from unravel.utils import dummy_labels, dummy_graph_ids\n", + "\n", + "# Load Kloppy dataset\n", + "kloppy_dataset = skillcorner.load_open_data(\n", + " include_empty_frames=False,\n", + " limit=500, # limit to 500 frames in this example\n", + ")\n", + "\n", + "# Initialize the Graph Converter, with dataset, labels and settings\n", + "converter = GraphConverter(\n", + " dataset=kloppy_dataset,\n", + " labels=dummy_labels(kloppy_dataset)\n", + ")\n", + "\n", + "# Compute the graphs and add them to the CustomSpektralDataset\n", + "dataset = CustomSpektralDataset(\n", + " data=converter.to_spektral_graphs()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from spektral.data import DisjointLoader\n", + "\n", + "N, F, S, n_out, n = dataset.dimensions()\n", + "\n", + "train, test = dataset.split_test_train(\n", + " split_train=4, split_test=1, random_seed=42\n", + ")\n", + "\n", + "loader_tr = DisjointLoader(train, batch_size=16, epochs=150)\n", + "loader_te = DisjointLoader(test, batch_size=16, epochs=1, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2. Build GNN Model\n", + "\n", + "For a functional implementation of a Graph Neural Network see the [Full Graph Converter Tutorial](1_tutorial_graph_converter.ipynb)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/getting_started.ipynb b/examples/1_tutorial_graph_converter.ipynb similarity index 76% rename from examples/getting_started.ipynb rename to examples/1_tutorial_graph_converter.ipynb index 8832058..d44c561 100644 --- a/examples/getting_started.ipynb +++ b/examples/1_tutorial_graph_converter.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 🌀 It's all starting to unravel!\n", + "## 🌀 unraveling the Graph Converter!\n", "\n", "First run `pip install unravelsports` if you haven't already!\n", "\n", @@ -18,7 +18,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install unravelsports" + "%pip install unravelsports --quiet" ] }, { @@ -105,10 +105,14 @@ "- `graph_id`. This is a single identifier (str or int) for a whole match, for example the unique match id.\n", "- `graph_ids`. This is a dictionary with the same keys as `labels`, but the values are now the unique identifiers. This option can be used if we want to split by sequence or possession_id. For example: {frame_id: 'matchId-sequenceId', frame_id: 'match_Id-sequenceId2'} etc. You will need to create your own possession/sequence ids. Note, if `labels` and `graph_ids` don't have the exact same keys it will throw an error. In this example we'll use the `graph_id=match_id` as the unique identifier, but feel free to change that for `graph_ids=dummy_graph_ids(dataset)` to test out that behavior.\n", "\n", - "Correctly splitting the final dataset in train, test and validiation sets is incorporated into `CustomGraphDataset` (see section 7 for more information).\n", + "Correctly splitting the final dataset in train, test and validiation sets is incorporated into `CustomSpektralDataset` (see section 7 for more information).\n", "\n", "\n", "#### Graph Converter Settings:\n", + "\n", + "
\n", + " 🌀 Expand for a full table of additional optional GraphConverter parameters
\n", + "\n", "| Parameter | Type | Description | Default |\n", "|-----------|------|-------------|---------|\n", "| `ball_carrier_threshold` | float | The distance threshold to determine the ball carrier in meters. If no ball carrier within ball_carrier_threshold, we skip the frame. | 25.0 |\n", @@ -125,15 +129,19 @@ "| `label_type` | str | The type of prediction label used. Currently only supports 'binary' | 'binary' |\n", "| `random_seed` | int, bool | When a random_seed is given, it will randomly shuffle an individual Graph without changing the underlying structure. When set to True, it will shuffle every frame differently; False won't shuffle. Advised to set True when creating an actual dataset to support Permutation Invariance. | False |\n", "| `pad` | bool | True pads to a total amount of 22 players and ball (so 23x23 adjacency matrix). It dynamically changes the edge feature padding size based on the combination of AdjacencyMatrixConnectType and AdjacencyMatrixType, and self_loop_ball. No need to set padding because smaller and larger graphs can all be used in the same dataset. | False |\n", - "| `verbose` | bool | The converter logs warnings / error messages when specific frames have no coordinates, or other missing information. False mutes all of these warnings. | False |" + "| `verbose` | bool | The converter logs warnings / error messages when specific frames have no coordinates, or other missing information. False mutes all of these warnings. | False |\n", + "\n", + "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### 4.1 What even is a Graph\n", + "#### 4.1 What is a Graph?\n", "\n", + "
\n", + " 🌀 Expand for an short explanations on Graphs \n", "
\n", "
\n", "\n", @@ -150,15 +158,16 @@ "\n", "The image on the right represents a stylized version of a frame of tracking data in soccer.\n", "\n", - "After completing step 6 and 7 we can see what this looks like in Python.\n", + "In section 6.1 we can see what this looks like in Python.\n", "\n", "
\n", "
\n", "\n", - "![Graph representation](https://github.com/UnravelSports/unravelsports.github.io/blob/main/imgs/what-is-a-graph-3.png?raw=true)\n", + "![Graph representation](https://github.com/UnravelSports/unravelsports.github.io/blob/main/imgs/what-is-a-graph-4.png?raw=true)\n", "\n", "
\n", - "
" + "\n", + "
" ] }, { @@ -206,20 +215,9 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 3, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Processing frames: 100%|██████████| 500/500 [00:03<00:00, 147.38it/s]\n", - "Processing frames: 100%|██████████| 500/500 [00:02<00:00, 249.61it/s]\n", - "Processing frames: 100%|██████████| 500/500 [00:01<00:00, 313.20it/s] \n", - "Processing frames: 100%|██████████| 500/500 [00:02<00:00, 244.34it/s]\n" - ] - } - ], + "outputs": [], "source": [ "from os.path import exists\n", "\n", @@ -241,7 +239,7 @@ " limit=500, # limit to 500 frames in this example\n", " )\n", "\n", - " # Initialize the GNN Converter, with dataset, labels and settings\n", + " # Initialize the Graph Converter, with dataset, labels and settings\n", " converter = GraphConverter(\n", " dataset=dataset,\n", " # create fake labels\n", @@ -276,20 +274,19 @@ "-------\n", "### 6. Creating a Custom Graph Dataset\n", "\n", - "- `CustomGraphDataset` (or `CounterDataset` as it's named in [U.S. Soccer Federation GNN Repository](https://github.com/USSoccerFederation/ussf_ssac_23_soccer_gnn/blob/main/counterattack.ipynb)) is a [`spektral.data.Dataset`](https://graphneural.network/creating-dataset/). \n", + "- `CustomSpektralDataset` (or `CounterDataset` as it's named in [U.S. Soccer Federation GNN Repository](https://github.com/USSoccerFederation/ussf_ssac_23_soccer_gnn/blob/main/counterattack.ipynb)) is a [`spektral.data.Dataset`](https://graphneural.network/creating-dataset/). \n", "This type of dataset is required to properly load and train a Spektral GNN.\n", - "- The `CustomGraphDataset` has a custom method `add()` that allows us to update to add more Graphs. This is useful because we can load an individual match pickle file and add/update the graphs directly to `dataset`. (Note this `dataset` is different than the previoulsy loaded Kloppy dataset!)" + "- The `CustomSpektralDataset` has a custom method `add()` that allows us to update to add more Graphs. This is useful because we can load an individual match pickle file and add/update the graphs directly to `dataset`. (Note this `dataset` is different than the previoulsy loaded Kloppy dataset!)" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "\n", - "\n", "def load_pickle(file_path):\n", " with open(file_path, \"rb\") as file:\n", " # Deserialize the object from the file\n", @@ -299,32 +296,38 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Loading 477 graphs into CustomGraphDataset...\n", - "Loading 477 graphs into CustomGraphDataset...\n", - "Adding 380 graphs to CustomGraphDataset...\n", - "Adding 336 graphs to CustomGraphDataset...\n", - "Adding 411 graphs to CustomGraphDataset...\n", - "Complete: CustomGraphDataset(n_graphs=1604)\n" + "Loading 477 graphs into CustomSpektralDataset...\n", + "Loading 477 graphs into CustomSpektralDataset...\n", + "Adding 380 graphs to CustomSpektralDataset...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Adding 336 graphs to CustomSpektralDataset...\n", + "Adding 411 graphs to CustomSpektralDataset...\n", + "Complete: CustomSpektralDataset(n_graphs=1604)\n" ] } ], "source": [ - "from unravel.soccer import CustomGraphDataset\n", + "from unravel.soccer import CustomSpektralDataset\n", "\n", - "dataset: CustomGraphDataset = None\n", + "dataset: CustomSpektralDataset = None\n", "\n", "for match_id in match_ids:\n", " graph_data = load_pickle(file_path=pickle_file_path.format(match_id=match_id))\n", "\n", " if not dataset:\n", - " dataset = CustomGraphDataset(data=graph_data)\n", + " dataset = CustomSpektralDataset(data=graph_data)\n", " else:\n", " dataset.add(graph_data, verbose=True)\n", "\n", @@ -335,44 +338,30 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### 6.1 Graphs in the CustomGraphDataset\n", + "#### 6.1 Graphs in the CustomSpektralDataset\n", "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's have a look at the internals of our `CustomGraphDataset`. \n", + "
\n", + " 🌀 Expand for a short explanation on CustomSpektralDataset
\n", + "\n", + "\n", + "##### CustomSpektralDataset\n", + "Let's have a look at the internals of our `CustomSpektralDataset`. \n", + "\n", + "The first item in our dataset has 23 nodes, 12 features per node and 7 features per edge.\n", + "\n", + "
\n", + "\n", + "```python\n", + "dataset.graphs[0]\n", + "\n", + ">>> Graph(n_nodes=23, n_node_features=12, n_edge_features=7, n_labels=1)\n", + "```\n", + "
\n", + "
\n", + "
\n", + "
\n", + " 🌀 Expand for a short explanation on the representation of adjacency matrix
\n", "\n", - "The first item in our dataset has 23 nodes, 12 features per node and 7 features per edge." - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Graph(n_nodes=23, n_node_features=12, n_edge_features=7, n_labels=1)" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dataset.graphs[0]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ "##### Adjacency Matrix\n", "The **adjacency matrix** is represented as a [compressed sparse row matrix](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html#scipy.sparse.csr_matrix), as required by Spektral. A 'normal' version of this same matrix would be of shape 23x23 filled with zero's and only one's in places where two players (or ball) are connected. \n", "\n", @@ -383,34 +372,21 @@ " - Ball connected to ball (1)\n", "- `adjacency_matrix_connect_type=\"ball\"`\n", " - All players and the ball (22) \n", - " - The ball and all players (22)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dataset.graphs[0].a" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + " - The ball and all players (22)\n", + "\n", + "
\n", + "\n", + "```python\n", + "dataset.graphs[0].a\n", + ">>> \n", + "```\n", + "
\n", + "
\n", + "
\n", + "
\n", + " 🌀 Expand for a short explanation on the representation of node feature matrix
\n", + "\n", "##### Node Features\n", "The **node features** are described using a regular Numpy array. Each column represents one feature and every row represents one player. \n", "\n", @@ -418,99 +394,81 @@ "\n", "See the bullet points in **5. Load Kloppy Data, Convert and Store** to learn which column represents which feature.\n", "\n", - "The rows filled with zero's are 'empty' players created because we set `pad=True`. Graph Neural Networks are flexible enough to deal with all sorts of different graph shapes in the same dataset, normally it's not actually necessary to add these empty players, even for incomplete data with only a couple players in frame." - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[-0.163 -0.135 0.245 -0.97 0.007 0.289 0.959 0.191 0.059 0.376 1. 1. ]\n", - " [-0.332 0.011 -0.061 0.998 0.02 0.76 1.015 0.177 0.029 0.009 1. 0.1 ]\n", - " [ 0.021 -0.072 0.987 -0.162 0.017 0.474 0.88 0.203 0.121 0.468 1. 1. ]\n", - " [-0.144 0.232 0.343 0.939 0.024 0.694 0.924 0.186 0.077 0.638 1. 1. ]\n", - " [-0.252 0.302 0.99 0.141 0.032 0.523 0.964 0.176 0.078 0.741 1. 1. ]\n", - " [ 0.012 0.573 0.834 -0.551 0.035 0.407 0.842 0.191 0.19 0.646 1. 1. ]\n", - " [-0.293 0.686 0.999 -0.045 0.044 0.493 0.966 0.163 0.182 0.761 1. 1. ]\n", - " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", - " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", - " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", - " ...\n", - " [ 0.202 0.124 -0.874 0.486 0.024 0.919 0.791 0.214 0.197 0.524 0.1 0.1 ]\n", - " [ 0.404 0.143 -0.997 0.08 0.029 0.987 0.709 0.23 0.281 0.519 0.1 0.1 ]\n", - " [ 0.195 -0.391 0.48 -0.877 0.014 0.33 0.847 0.218 0.222 0.417 0.1 0.1 ]\n", - " [ 0.212 -0.063 0.982 -0.187 0.009 0.47 0.804 0.217 0.2 0.483 0.1 0.1 ]\n", - " [-0.03 0.248 -0.996 0.091 0.021 0.986 0.876 0.194 0.116 0.591 0.1 0.1 ]\n", - " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", - " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", - " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", - " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", - " [-0.262 0.016 0.937 -0.35 0.036 0.443 0.986 0.044 0. 0. 0. 0. ]]\n", - "(23, 12)\n" - ] - } - ], - "source": [ - "# We're applying some settings, and rounding to show the matrix better\n", - "import numpy as np\n", - "np.set_printoptions(edgeitems=10, linewidth=100000, threshold=1)\n", + "The rows filled with zero's are 'empty' players created because we set `pad=True`. Graph Neural Networks are flexible enough to deal with all sorts of different graph shapes in the same dataset, normally it's not actually necessary to add these empty players, even for incomplete data with only a couple players in frame.\n", + "\n", + "
\n", + "\n", + "```python\n", + "dataset.graphs[0].x\n", + ">>> [[-0.163 -0.135 0.245 -0.97 0.007 0.289 0.959 0.191 0.059 0.376 1. 1. ]\n", + " [-0.332 0.011 -0.061 0.998 0.02 0.76 1.015 0.177 0.029 0.009 1. 0.1 ]\n", + " [ 0.021 -0.072 0.987 -0.162 0.017 0.474 0.88 0.203 0.121 0.468 1. 1. ]\n", + " [-0.144 0.232 0.343 0.939 0.024 0.694 0.924 0.186 0.077 0.638 1. 1. ]\n", + " [-0.252 0.302 0.99 0.141 0.032 0.523 0.964 0.176 0.078 0.741 1. 1. ]\n", + " [ 0.012 0.573 0.834 -0.551 0.035 0.407 0.842 0.191 0.19 0.646 1. 1. ]\n", + " [-0.293 0.686 0.999 -0.045 0.044 0.493 0.966 0.163 0.182 0.761 1. 1. ]\n", + " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", + " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", + " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", + " ...\n", + " [ 0.202 0.124 -0.874 0.486 0.024 0.919 0.791 0.214 0.197 0.524 0.1 0.1 ]\n", + " [ 0.404 0.143 -0.997 0.08 0.029 0.987 0.709 0.23 0.281 0.519 0.1 0.1 ]\n", + " [ 0.195 -0.391 0.48 -0.877 0.014 0.33 0.847 0.218 0.222 0.417 0.1 0.1 ]\n", + " [ 0.212 -0.063 0.982 -0.187 0.009 0.47 0.804 0.217 0.2 0.483 0.1 0.1 ]\n", + " [-0.03 0.248 -0.996 0.091 0.021 0.986 0.876 0.194 0.116 0.591 0.1 0.1 ]\n", + " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", + " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", + " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", + " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", + " [-0.262 0.016 0.937 -0.35 0.036 0.443 0.986 0.044 0. 0. 0. 0. ]]\n", + "\n", + " \n", + "dataset.graphs[0].x.shape\n", + ">>> (23, 12)\n", + "```\n", + "
\n", + "
\n", + "
\n", + "
\n", + " 🌀 Expand for a short explanation on the representation of edge feature matrix
\n", "\n", - "print(np.round(dataset.graphs[0].x, 3))\n", - "print(dataset.graphs[0].x.shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ "##### Edge Features\n", "The **edge features** are also represented in a regular Numpy array. Again, each column represents one feature, and every row decribes the connection between two players, or player and ball.\n", "\n", - "We saw before how the **adjacency matrix** was presented in a Sparse Row Matrix with 287 rows. It is no coincidence this lines up perfectly with the **edge feature matrix**. " - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 0. 0. 1. 0.5 0.5 1. 0. ]\n", - " [ 0.081 0.006 0.936 0.255 0.21 0.907 1. ]\n", - " [ 0.079 0.004 0.012 0.391 0. 0.515 1. ]\n", - " [ 0.1 0.007 0.46 0.002 0.005 0.571 1. ]\n", - " [ 0.125 0.011 0.65 0.023 0.474 0.999 0. ]\n", - " [ 0.206 0.012 0.322 0.033 0.535 0.999 0. ]\n", - " [ 0.23 0.016 0.619 0.014 0.567 0.996 0. ]\n", - " [ 0. 0. 0. 0. 0. 0. 0. ]\n", - " [ 0. 0. 0. 0. 0. 0. 0. ]\n", - " [ 0. 0. 0. 0. 0. 0. 0. ]\n", - " ...\n", - " [ 0.197 -0.025 0.005 0.426 0.929 0.757 1. ]\n", - " [ 0.281 -0.023 0.004 0.439 0.959 0.699 1. ]\n", - " [ 0.222 -0.03 0.067 0.75 0.979 0.643 1. ]\n", - " [ 0.2 -0.032 0.003 0.554 0.982 0.633 1. ]\n", - " [ 0.116 -0.026 0.08 0.229 0.82 0.884 1. ]\n", - " [ 0. 0. 0. 0. 0. 0. 1. ]\n", - " [ 0. 0. 0. 0. 0. 0. 1. ]\n", - " [ 0. 0. 0. 0. 0. 0. 1. ]\n", - " [ 0. 0. 0. 0. 0. 0. 1. ]\n", - " [ 0. 0. 1. 0.5 0.5 1. 1. ]]\n", - "(287, 7)\n" - ] - } - ], - "source": [ - "print(np.round(dataset.graphs[0].e, 3))\n", - "print(dataset.graphs[0].e.shape)" + "We saw before how the **adjacency matrix** was presented in a Sparse Row Matrix with 287 rows. It is no coincidence this lines up perfectly with the **edge feature matrix**. \n", + "\n", + "
\n", + "\n", + "```python\n", + "dataset.graphs[0].e\n", + ">>> [[ 0. 0. 1. 0.5 0.5 1. 0. ]\n", + " [ 0.081 0.006 0.936 0.255 0.21 0.907 1. ]\n", + " [ 0.079 0.004 0.012 0.391 0. 0.515 1. ]\n", + " [ 0.1 0.007 0.46 0.002 0.005 0.571 1. ]\n", + " [ 0.125 0.011 0.65 0.023 0.474 0.999 0. ]\n", + " [ 0.206 0.012 0.322 0.033 0.535 0.999 0. ]\n", + " [ 0.23 0.016 0.619 0.014 0.567 0.996 0. ]\n", + " [ 0. 0. 0. 0. 0. 0. 0. ]\n", + " [ 0. 0. 0. 0. 0. 0. 0. ]\n", + " [ 0. 0. 0. 0. 0. 0. 0. ]\n", + " ...\n", + " [ 0.197 -0.025 0.005 0.426 0.929 0.757 1. ]\n", + " [ 0.281 -0.023 0.004 0.439 0.959 0.699 1. ]\n", + " [ 0.222 -0.03 0.067 0.75 0.979 0.643 1. ]\n", + " [ 0.2 -0.032 0.003 0.554 0.982 0.633 1. ]\n", + " [ 0.116 -0.026 0.08 0.229 0.82 0.884 1. ]\n", + " [ 0. 0. 0. 0. 0. 0. 1. ]\n", + " [ 0. 0. 0. 0. 0. 0. 1. ]\n", + " [ 0. 0. 0. 0. 0. 0. 1. ]\n", + " [ 0. 0. 0. 0. 0. 0. 1. ]\n", + " [ 0. 0. 1. 0.5 0.5 1. 1. ]]\n", + "\n", + " dataset.graphs[0].e.shape\n", + " (287, 7)\n", + "```\n", + "
\n", + "
\n", + "\n" ] }, { @@ -520,7 +478,7 @@ "---------\n", "### 7. Prepare for Training\n", "\n", - "Now that we have all the data converted as Graphs inside our `CustomGraphDataset` object, we can prepare to train the GNN model.\n", + "Now that we have all the data converted as Graphs inside our `CustomSpektralDataset` object, we can prepare to train the GNN model.\n", "\n", "We first get all necessary information from our dataset that we need to train our model, namely:\n", "- N = Max amount of nodes in a single graph\n", @@ -565,9 +523,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Train: CustomGraphDataset(n_graphs=791)\n", - "Test: CustomGraphDataset(n_graphs=477)\n", - "Validation: CustomGraphDataset(n_graphs=336)\n" + "Train: CustomSpektralDataset(n_graphs=791)\n", + "Test: CustomSpektralDataset(n_graphs=477)\n", + "Validation: CustomSpektralDataset(n_graphs=336)\n" ] } ], @@ -707,11 +665,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "Loss: 8.258899688720703\n", - "Loss: 2.5070531368255615\n", - "Loss: 0.9786378741264343\n", - "Loss: 0.7528772950172424\n", - "Loss: 0.7192981243133545\n" + "Loss: 9.539023399353027\n", + "Loss: 1.9100297689437866\n", + "Loss: 1.2216181755065918\n", + "Loss: 1.3887213468551636\n", + "Loss: 0.9166012406349182\n" ] } ], diff --git a/tests/test_kloppy.py b/tests/test_kloppy.py index 94d8140..6a03590 100644 --- a/tests/test_kloppy.py +++ b/tests/test_kloppy.py @@ -1,5 +1,5 @@ from pathlib import Path -from unravel.soccer import GraphConverter, CustomGraphDataset, GraphFrame +from unravel.soccer import GraphConverter, CustomSpektralDataset, GraphFrame from unravel.utils import DefaultTrackingModel, dummy_labels, dummy_graph_ids from kloppy import skillcorner @@ -176,7 +176,7 @@ def test_to_spektral_graph(self, gnnc: GraphConverter): assert 1.0 == pytest.approx(a[0, 4], abs=1e-5) assert 0.0 == pytest.approx(a[8, 2], abs=1e-5) - dataset = CustomGraphDataset(data=spektral_graphs) + dataset = CustomSpektralDataset(data=spektral_graphs) N, F, S, n_out, n = dataset.dimensions() assert N == 21 assert F == 12 diff --git a/unravel/soccer/graphs/graph_converter.py b/unravel/soccer/graphs/graph_converter.py index 99ace96..3b08b65 100644 --- a/unravel/soccer/graphs/graph_converter.py +++ b/unravel/soccer/graphs/graph_converter.py @@ -29,7 +29,7 @@ KeyMismatchError, ) -from .objects import GraphSettings, GraphFrame, CustomGraphDataset +from .objects import GraphSettings, GraphFrame, CustomSpektralDataset from ...utils import DefaultTrackingModel, NoLabelWarning @@ -51,7 +51,7 @@ class GraphConverter: graph_id (str, int): Set a single id for the whole Kloppy dataset. graph_ids (dict): Frame level control over graph ids. - The graph_ids will be used to assign each graph an identifier. This identifier allows us to split the CustomGraphDataset such that + The graph_ids will be used to assign each graph an identifier. This identifier allows us to split the CustomSpektralDataset such that all graphs with the same id are either all in the test, train or validation set to avoid leakage. It is recommended to either set graph_id (int, str) as a match_id, or pass a dictionary into 'graph_ids' with exactly the same keys as 'labels' for more granualar control over the graph ids. The latter can be useful when splitting graphs by possession or sequence id. In this case the dict would be {frame_id: sequence_id/possession_id}. @@ -270,12 +270,12 @@ def to_spektral_graphs(self) -> List[Graph]: spektral_graphs = [g.to_spektral_graph() for g in self.graph_frames] return spektral_graphs - def to_custom_dataset(self) -> CustomGraphDataset: + def to_custom_dataset(self) -> CustomSpektralDataset: """ Spektral requires a spektral Dataset to load the data for docs see https://graphneural.network/creating-dataset/ """ - return CustomGraphDataset(data=self.to_spektral_graphs()) + return CustomSpektralDataset(data=self.to_spektral_graphs()) def to_pickle(self, file_path: str) -> None: """ diff --git a/unravel/soccer/graphs/objects/__init__.py b/unravel/soccer/graphs/objects/__init__.py index 82e89e6..5a927f5 100644 --- a/unravel/soccer/graphs/objects/__init__.py +++ b/unravel/soccer/graphs/objects/__init__.py @@ -1,3 +1,3 @@ from .graph_frame import GraphFrame from .graph_settings import GraphSettings -from .custom_graph_dataset import CustomGraphDataset +from .custom_spektral_dataset import CustomSpektralDataset diff --git a/unravel/soccer/graphs/objects/custom_graph_dataset.py b/unravel/soccer/graphs/objects/custom_spektral_dataset.py similarity index 94% rename from unravel/soccer/graphs/objects/custom_graph_dataset.py rename to unravel/soccer/graphs/objects/custom_spektral_dataset.py index 96fa07e..4973ec9 100644 --- a/unravel/soccer/graphs/objects/custom_graph_dataset.py +++ b/unravel/soccer/graphs/objects/custom_spektral_dataset.py @@ -21,9 +21,9 @@ logger.addHandler(stdout_handler) -class CustomGraphDataset(Dataset, Sequence): +class CustomSpektralDataset(Dataset, Sequence): """ - A CustomGraphDataset is required to use all Spektral funcitonality, see 'spektral.data -> Dataset' + A CustomSpektralDataset is required to use all Spektral funcitonality, see 'spektral.data -> Dataset' """ def __init__(self, **kwargs): @@ -61,7 +61,7 @@ def read(self) -> List[Graph]: """ data = self.__convert(self.data) - logger.info(f"Loading {len(data)} graphs into CustomGraphDataset...") + logger.info(f"Loading {len(data)} graphs into CustomSpektralDataset...") return data @@ -69,7 +69,7 @@ def add(self, other, verbose: bool = False): other = self.__convert(other) if verbose: - logger.info(f"Adding {len(other)} graphs to CustomGraphDataset...") + logger.info(f"Adding {len(other)} graphs to CustomSpektralDataset...") self.graphs = self.graphs + other @@ -148,7 +148,7 @@ def split_test_train_validation( num_test = dataset_length - num_train num_validation = 0 - unique_graph_ids = set([g.get("id")[0] for g in self]) + unique_graph_ids = set([g.get("id") if hasattr(g, "id") else None for g in self]) if unique_graph_ids == {None}: by_graph_id = False