From 14dbe17eafaecc9e65a9118f0e1e915da9dc65b4 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Tue, 16 Jan 2024 09:18:58 +0100 Subject: [PATCH] Update data loading notebooks --- .../1-load-and-convert-statsbomb-data.ipynb | 323 ++++++++---------- .../1-load-and-convert-wyscout-data.ipynb | 302 ++++++++++++---- 2 files changed, 383 insertions(+), 242 deletions(-) diff --git a/notebooks/1-load-and-convert-statsbomb-data.ipynb b/notebooks/1-load-and-convert-statsbomb-data.ipynb index f6ca5db..3eee4c0 100644 --- a/notebooks/1-load-and-convert-statsbomb-data.ipynb +++ b/notebooks/1-load-and-convert-statsbomb-data.ipynb @@ -155,6 +155,7 @@ " 'La Liga',\n", " 'Liga Profesional',\n", " 'Ligue 1',\n", + " 'Major League Soccer',\n", " 'NWSL',\n", " 'North American League',\n", " 'Premier League',\n", @@ -304,7 +305,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": { "tags": [] }, @@ -320,7 +321,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 306/306 [12:45<00:00, 2.50s/it]\n" + "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 306/306 [10:19<00:00, 2.02s/it]\n" ] }, { @@ -334,7 +335,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 380/380 [15:25<00:00, 2.44s/it]\n" + "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 380/380 [12:13<00:00, 1.93s/it]\n" ] }, { @@ -348,7 +349,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 380/380 [15:33<00:00, 2.46s/it]\n" + "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 380/380 [12:04<00:00, 1.91s/it]\n" ] }, { @@ -362,7 +363,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 377/377 [15:46<00:00, 2.51s/it]\n" + "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 377/377 [13:02<00:00, 2.08s/it]\n" ] }, { @@ -376,7 +377,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 380/380 [15:54<00:00, 2.51s/it]\n" + "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 380/380 [14:30<00:00, 2.29s/it]\n" ] } ], @@ -409,7 +410,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -561,7 +562,7 @@ "3890411 0 Olympiastadion Berlin Peter Sippel " ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -572,7 +573,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -638,7 +639,7 @@ "172 Augsburg" ] }, - "execution_count": 12, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -649,7 +650,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -686,33 +687,33 @@ " \n", " \n", " \n", - " 20662\n", + " 3053\n", " 181\n", - " Alexander Nübel\n", + " Leroy Sané\n", " NaN\n", " \n", " \n", - " 37972\n", - " 175\n", - " Jens Grahl\n", - " NaN\n", + " 3499\n", + " 181\n", + " Jean-Eric Maxim Choupo-Moting\n", + " Eric Maxim Choupo-Moting\n", " \n", " \n", - " 15146\n", - " 178\n", - " Mike-Steven Bähre\n", - " Mike Bähre\n", + " 3502\n", + " 181\n", + " Joël Andre Job Matip\n", + " Joël Matip\n", " \n", " \n", - " 401018\n", - " 184\n", - " Nico Rinderknecht\n", + " 3510\n", + " 181\n", + " Sead Kolašinac\n", " NaN\n", " \n", " \n", - " 28601\n", - " 174\n", - " Marvin Wanitzek\n", + " 5242\n", + " 181\n", + " Younès Belhanda\n", " NaN\n", " \n", " \n", @@ -720,16 +721,16 @@ "" ], "text/plain": [ - " team_id player_name nickname\n", - "player_id \n", - "20662 181 Alexander Nübel NaN\n", - "37972 175 Jens Grahl NaN\n", - "15146 178 Mike-Steven Bähre Mike Bähre\n", - "401018 184 Nico Rinderknecht NaN\n", - "28601 174 Marvin Wanitzek NaN" + " team_id player_name nickname\n", + "player_id \n", + "3053 181 Leroy Sané NaN\n", + "3499 181 Jean-Eric Maxim Choupo-Moting Eric Maxim Choupo-Moting\n", + "3502 181 Joël Andre Job Matip Joël Matip\n", + "3510 181 Sead Kolašinac NaN\n", + "5242 181 Younès Belhanda NaN" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -740,7 +741,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -764,7 +765,7 @@ " \n", " \n", " \n", - " \n", + " game_id\n", " period_id\n", " team_id\n", " player_id\n", @@ -774,7 +775,6 @@ " timestamp\n", " minute\n", " second\n", - " possession\n", " ...\n", " team_name\n", " duration\n", @@ -788,7 +788,6 @@ " counterpress\n", " \n", " \n", - " game_id\n", " event_id\n", " \n", " \n", @@ -815,18 +814,17 @@ " \n", " \n", " \n", - " 3890561\n", " 41bd60ac-9b2c-4cb8-85aa-23ae71825c1e\n", + " 3890561\n", " 1\n", " 175\n", " NaN\n", " 35\n", " Starting XI\n", " 1\n", - " 1900-01-01 00:00:00.000\n", + " 0 days 00:00:00\n", " 0\n", " 0\n", - " 1\n", " ...\n", " Hoffenheim\n", " 0.000000\n", @@ -841,16 +839,16 @@ " \n", " \n", " fbca533d-f3f4-4a86-b4a3-4fcae63592cf\n", + " 3890561\n", " 1\n", " 181\n", " NaN\n", " 35\n", " Starting XI\n", " 2\n", - " 1900-01-01 00:00:00.000\n", + " 0 days 00:00:00\n", " 0\n", " 0\n", - " 1\n", " ...\n", " Schalke 04\n", " 0.000000\n", @@ -865,16 +863,16 @@ " \n", " \n", " b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01\n", + " 3890561\n", " 1\n", " 175\n", " NaN\n", " 18\n", " Half Start\n", " 3\n", - " 1900-01-01 00:00:00.000\n", + " 0 days 00:00:00\n", " 0\n", " 0\n", - " 1\n", " ...\n", " Hoffenheim\n", " 0.000000\n", @@ -889,16 +887,16 @@ " \n", " \n", " 442128f8-2e38-491c-bf1e-b336e91757fa\n", + " 3890561\n", " 1\n", " 181\n", " NaN\n", " 18\n", " Half Start\n", " 4\n", - " 1900-01-01 00:00:00.000\n", + " 0 days 00:00:00\n", " 0\n", " 0\n", - " 1\n", " ...\n", " Schalke 04\n", " 0.000000\n", @@ -913,16 +911,16 @@ " \n", " \n", " 644e16d7-10ca-45f0-8128-fc0055d6f753\n", + " 3890561\n", " 1\n", " 175\n", " 8387.0\n", " 30\n", " Pass\n", " 5\n", - " 1900-01-01 00:00:00.482\n", + " 0 days 00:00:00.482000\n", " 0\n", " 0\n", - " 2\n", " ...\n", " Hoffenheim\n", " 0.453238\n", @@ -937,102 +935,86 @@ " \n", " \n", "\n", - "

5 rows × 24 columns

\n", + "

5 rows × 25 columns

\n", "" ], "text/plain": [ - " period_id team_id player_id \\\n", - "game_id event_id \n", - "3890561 41bd60ac-9b2c-4cb8-85aa-23ae71825c1e 1 175 NaN \n", - " fbca533d-f3f4-4a86-b4a3-4fcae63592cf 1 181 NaN \n", - " b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 1 175 NaN \n", - " 442128f8-2e38-491c-bf1e-b336e91757fa 1 181 NaN \n", - " 644e16d7-10ca-45f0-8128-fc0055d6f753 1 175 8387.0 \n", - "\n", - " type_id type_name index \\\n", - "game_id event_id \n", - "3890561 41bd60ac-9b2c-4cb8-85aa-23ae71825c1e 35 Starting XI 1 \n", - " fbca533d-f3f4-4a86-b4a3-4fcae63592cf 35 Starting XI 2 \n", - " b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 18 Half Start 3 \n", - " 442128f8-2e38-491c-bf1e-b336e91757fa 18 Half Start 4 \n", - " 644e16d7-10ca-45f0-8128-fc0055d6f753 30 Pass 5 \n", + " game_id period_id team_id player_id \\\n", + "event_id \n", + "41bd60ac-9b2c-4cb8-85aa-23ae71825c1e 3890561 1 175 NaN \n", + "fbca533d-f3f4-4a86-b4a3-4fcae63592cf 3890561 1 181 NaN \n", + "b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 3890561 1 175 NaN \n", + "442128f8-2e38-491c-bf1e-b336e91757fa 3890561 1 181 NaN \n", + "644e16d7-10ca-45f0-8128-fc0055d6f753 3890561 1 175 8387.0 \n", "\n", - " timestamp minute \\\n", - "game_id event_id \n", - "3890561 41bd60ac-9b2c-4cb8-85aa-23ae71825c1e 1900-01-01 00:00:00.000 0 \n", - " fbca533d-f3f4-4a86-b4a3-4fcae63592cf 1900-01-01 00:00:00.000 0 \n", - " b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 1900-01-01 00:00:00.000 0 \n", - " 442128f8-2e38-491c-bf1e-b336e91757fa 1900-01-01 00:00:00.000 0 \n", - " 644e16d7-10ca-45f0-8128-fc0055d6f753 1900-01-01 00:00:00.482 0 \n", + " type_id type_name index \\\n", + "event_id \n", + "41bd60ac-9b2c-4cb8-85aa-23ae71825c1e 35 Starting XI 1 \n", + "fbca533d-f3f4-4a86-b4a3-4fcae63592cf 35 Starting XI 2 \n", + "b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 18 Half Start 3 \n", + "442128f8-2e38-491c-bf1e-b336e91757fa 18 Half Start 4 \n", + "644e16d7-10ca-45f0-8128-fc0055d6f753 30 Pass 5 \n", "\n", - " second possession ... \\\n", - "game_id event_id ... \n", - "3890561 41bd60ac-9b2c-4cb8-85aa-23ae71825c1e 0 1 ... \n", - " fbca533d-f3f4-4a86-b4a3-4fcae63592cf 0 1 ... \n", - " b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 0 1 ... \n", - " 442128f8-2e38-491c-bf1e-b336e91757fa 0 1 ... \n", - " 644e16d7-10ca-45f0-8128-fc0055d6f753 0 2 ... \n", + " timestamp minute second \\\n", + "event_id \n", + "41bd60ac-9b2c-4cb8-85aa-23ae71825c1e 0 days 00:00:00 0 0 \n", + "fbca533d-f3f4-4a86-b4a3-4fcae63592cf 0 days 00:00:00 0 0 \n", + "b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 0 days 00:00:00 0 0 \n", + "442128f8-2e38-491c-bf1e-b336e91757fa 0 days 00:00:00 0 0 \n", + "644e16d7-10ca-45f0-8128-fc0055d6f753 0 days 00:00:00.482000 0 0 \n", "\n", - " team_name duration \\\n", - "game_id event_id \n", - "3890561 41bd60ac-9b2c-4cb8-85aa-23ae71825c1e Hoffenheim 0.000000 \n", - " fbca533d-f3f4-4a86-b4a3-4fcae63592cf Schalke 04 0.000000 \n", - " b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 Hoffenheim 0.000000 \n", - " 442128f8-2e38-491c-bf1e-b336e91757fa Schalke 04 0.000000 \n", - " 644e16d7-10ca-45f0-8128-fc0055d6f753 Hoffenheim 0.453238 \n", + " ... team_name duration \\\n", + "event_id ... \n", + "41bd60ac-9b2c-4cb8-85aa-23ae71825c1e ... Hoffenheim 0.000000 \n", + "fbca533d-f3f4-4a86-b4a3-4fcae63592cf ... Schalke 04 0.000000 \n", + "b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 ... Hoffenheim 0.000000 \n", + "442128f8-2e38-491c-bf1e-b336e91757fa ... Schalke 04 0.000000 \n", + "644e16d7-10ca-45f0-8128-fc0055d6f753 ... Hoffenheim 0.453238 \n", "\n", - " extra \\\n", - "game_id event_id \n", - "3890561 41bd60ac-9b2c-4cb8-85aa-23ae71825c1e {'tactics': {'formation': 3421, 'lineup': [{'p... \n", - " fbca533d-f3f4-4a86-b4a3-4fcae63592cf {'tactics': {'formation': 4141, 'lineup': [{'p... \n", - " b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 {} \n", - " 442128f8-2e38-491c-bf1e-b336e91757fa {} \n", - " 644e16d7-10ca-45f0-8128-fc0055d6f753 {'pass': {'recipient': {'id': 5460, 'name': 'A... \n", + " extra \\\n", + "event_id \n", + "41bd60ac-9b2c-4cb8-85aa-23ae71825c1e {'tactics': {'formation': 3421, 'lineup': [{'p... \n", + "fbca533d-f3f4-4a86-b4a3-4fcae63592cf {'tactics': {'formation': 4141, 'lineup': [{'p... \n", + "b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 {} \n", + "442128f8-2e38-491c-bf1e-b336e91757fa {} \n", + "644e16d7-10ca-45f0-8128-fc0055d6f753 {'pass': {'recipient': {'id': 5460, 'name': 'A... \n", "\n", - " related_events \\\n", - "game_id event_id \n", - "3890561 41bd60ac-9b2c-4cb8-85aa-23ae71825c1e [] \n", - " fbca533d-f3f4-4a86-b4a3-4fcae63592cf [] \n", - " b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 [442128f8-2e38-491c-bf1e-b336e91757fa] \n", - " 442128f8-2e38-491c-bf1e-b336e91757fa [b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01] \n", - " 644e16d7-10ca-45f0-8128-fc0055d6f753 [7602c8d9-d988-4eae-bb9f-309fbad4c7c5] \n", + " related_events \\\n", + "event_id \n", + "41bd60ac-9b2c-4cb8-85aa-23ae71825c1e [] \n", + "fbca533d-f3f4-4a86-b4a3-4fcae63592cf [] \n", + "b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 [442128f8-2e38-491c-bf1e-b336e91757fa] \n", + "442128f8-2e38-491c-bf1e-b336e91757fa [b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01] \n", + "644e16d7-10ca-45f0-8128-fc0055d6f753 [7602c8d9-d988-4eae-bb9f-309fbad4c7c5] \n", "\n", - " player_name position_id \\\n", - "game_id event_id \n", - "3890561 41bd60ac-9b2c-4cb8-85aa-23ae71825c1e NaN NaN \n", - " fbca533d-f3f4-4a86-b4a3-4fcae63592cf NaN NaN \n", - " b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 NaN NaN \n", - " 442128f8-2e38-491c-bf1e-b336e91757fa NaN NaN \n", - " 644e16d7-10ca-45f0-8128-fc0055d6f753 Mark Uth 18.0 \n", + " player_name position_id \\\n", + "event_id \n", + "41bd60ac-9b2c-4cb8-85aa-23ae71825c1e NaN NaN \n", + "fbca533d-f3f4-4a86-b4a3-4fcae63592cf NaN NaN \n", + "b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 NaN NaN \n", + "442128f8-2e38-491c-bf1e-b336e91757fa NaN NaN \n", + "644e16d7-10ca-45f0-8128-fc0055d6f753 Mark Uth 18.0 \n", "\n", - " position_name \\\n", - "game_id event_id \n", - "3890561 41bd60ac-9b2c-4cb8-85aa-23ae71825c1e NaN \n", - " fbca533d-f3f4-4a86-b4a3-4fcae63592cf NaN \n", - " b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 NaN \n", - " 442128f8-2e38-491c-bf1e-b336e91757fa NaN \n", - " 644e16d7-10ca-45f0-8128-fc0055d6f753 Right Attacking Midfield \n", + " position_name location \\\n", + "event_id \n", + "41bd60ac-9b2c-4cb8-85aa-23ae71825c1e NaN NaN \n", + "fbca533d-f3f4-4a86-b4a3-4fcae63592cf NaN NaN \n", + "b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 NaN NaN \n", + "442128f8-2e38-491c-bf1e-b336e91757fa NaN NaN \n", + "644e16d7-10ca-45f0-8128-fc0055d6f753 Right Attacking Midfield [61.0, 40.1] \n", "\n", - " location under_pressure \\\n", - "game_id event_id \n", - "3890561 41bd60ac-9b2c-4cb8-85aa-23ae71825c1e NaN False \n", - " fbca533d-f3f4-4a86-b4a3-4fcae63592cf NaN False \n", - " b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 NaN False \n", - " 442128f8-2e38-491c-bf1e-b336e91757fa NaN False \n", - " 644e16d7-10ca-45f0-8128-fc0055d6f753 [61.0, 40.1] False \n", + " under_pressure counterpress \n", + "event_id \n", + "41bd60ac-9b2c-4cb8-85aa-23ae71825c1e False False \n", + "fbca533d-f3f4-4a86-b4a3-4fcae63592cf False False \n", + "b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 False False \n", + "442128f8-2e38-491c-bf1e-b336e91757fa False False \n", + "644e16d7-10ca-45f0-8128-fc0055d6f753 False False \n", "\n", - " counterpress \n", - "game_id event_id \n", - "3890561 41bd60ac-9b2c-4cb8-85aa-23ae71825c1e False \n", - " fbca533d-f3f4-4a86-b4a3-4fcae63592cf False \n", - " b15ba6b1-61ac-4d9c-b2a3-096ce31bcf01 False \n", - " 442128f8-2e38-491c-bf1e-b336e91757fa False \n", - " 644e16d7-10ca-45f0-8128-fc0055d6f753 False \n", - "\n", - "[5 rows x 24 columns]" + "[5 rows x 25 columns]" ] }, - "execution_count": 15, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1043,7 +1025,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -1067,7 +1049,7 @@ " \n", " \n", " \n", - " \n", + " game_id\n", " original_event_id\n", " period_id\n", " time_seconds\n", @@ -1082,7 +1064,6 @@ " bodypart_id\n", " \n", " \n", - " game_id\n", " action_id\n", " \n", " \n", @@ -1096,15 +1077,16 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", " \n", - " 3890561\n", " 0\n", + " 3890561\n", " 644e16d7-10ca-45f0-8128-fc0055d6f753\n", " 1\n", - " 0.0\n", + " 0.482\n", " 175\n", " 8387.0\n", " 53.33125\n", @@ -1117,9 +1099,10 @@ " \n", " \n", " 1\n", + " 3890561\n", " 329a1879-2521-4614-8c68-b4798b0e5d23\n", " 1\n", - " 0.0\n", + " 0.935\n", " 175\n", " 5460.0\n", " 52.63125\n", @@ -1132,9 +1115,10 @@ " \n", " \n", " 2\n", + " 3890561\n", " 77e2ddaf-6de3-49e7-a318-7d765799b543\n", " 1\n", - " 1.0\n", + " 1.015\n", " 175\n", " 5460.0\n", " 51.93125\n", @@ -1147,9 +1131,10 @@ " \n", " \n", " 3\n", + " 3890561\n", " 1b91a029-f722-4b0d-b9d5-53cdc776f9e3\n", " 1\n", - " 2.0\n", + " 2.167\n", " 175\n", " 6039.0\n", " 47.11875\n", @@ -1162,9 +1147,10 @@ " \n", " \n", " 4\n", + " 3890561\n", " 2c51f271-c812-45af-896b-06f49a14a5bb\n", " 1\n", - " 2.0\n", + " 2.954\n", " 175\n", " 6039.0\n", " 45.71875\n", @@ -1180,32 +1166,32 @@ "" ], "text/plain": [ - " original_event_id period_id \\\n", - "game_id action_id \n", - "3890561 0 644e16d7-10ca-45f0-8128-fc0055d6f753 1 \n", - " 1 329a1879-2521-4614-8c68-b4798b0e5d23 1 \n", - " 2 77e2ddaf-6de3-49e7-a318-7d765799b543 1 \n", - " 3 1b91a029-f722-4b0d-b9d5-53cdc776f9e3 1 \n", - " 4 2c51f271-c812-45af-896b-06f49a14a5bb 1 \n", + " game_id original_event_id period_id \\\n", + "action_id \n", + "0 3890561 644e16d7-10ca-45f0-8128-fc0055d6f753 1 \n", + "1 3890561 329a1879-2521-4614-8c68-b4798b0e5d23 1 \n", + "2 3890561 77e2ddaf-6de3-49e7-a318-7d765799b543 1 \n", + "3 3890561 1b91a029-f722-4b0d-b9d5-53cdc776f9e3 1 \n", + "4 3890561 2c51f271-c812-45af-896b-06f49a14a5bb 1 \n", "\n", - " time_seconds team_id player_id start_x start_y \\\n", - "game_id action_id \n", - "3890561 0 0.0 175 8387.0 53.33125 33.9575 \n", - " 1 0.0 175 5460.0 52.63125 35.8275 \n", - " 2 1.0 175 5460.0 51.93125 35.4875 \n", - " 3 2.0 175 6039.0 47.11875 32.2575 \n", - " 4 2.0 175 6039.0 45.71875 29.6225 \n", + " time_seconds team_id player_id start_x start_y end_x \\\n", + "action_id \n", + "0 0.482 175 8387.0 53.33125 33.9575 52.63125 \n", + "1 0.935 175 5460.0 52.63125 35.8275 51.93125 \n", + "2 1.015 175 5460.0 51.93125 35.4875 47.11875 \n", + "3 2.167 175 6039.0 47.11875 32.2575 45.71875 \n", + "4 2.954 175 6039.0 45.71875 29.6225 29.96875 \n", "\n", - " end_x end_y type_id result_id bodypart_id \n", - "game_id action_id \n", - "3890561 0 52.63125 35.8275 0 1 4 \n", - " 1 51.93125 35.4875 21 1 0 \n", - " 2 47.11875 32.2575 0 1 4 \n", - " 3 45.71875 29.6225 21 1 0 \n", - " 4 29.96875 15.3425 0 1 5 " + " end_y type_id result_id bodypart_id \n", + "action_id \n", + "0 35.8275 0 1 4 \n", + "1 35.4875 21 1 0 \n", + "2 32.2575 0 1 4 \n", + "3 29.6225 21 1 0 \n", + "4 15.3425 0 1 5 " ] }, - "execution_count": 16, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1216,7 +1202,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "metadata": { "tags": [] }, @@ -1224,18 +1210,11 @@ "source": [ "dataset.close()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "/home/pieterr/Jupiter/Projects/soccer_xg", + "display_name": "soccer_xg", "language": "python", "name": "soccer_xg" }, diff --git a/notebooks/1-load-and-convert-wyscout-data.ipynb b/notebooks/1-load-and-convert-wyscout-data.ipynb index daaaf9f..0e7b2b2 100644 --- a/notebooks/1-load-and-convert-wyscout-data.ipynb +++ b/notebooks/1-load-and-convert-wyscout-data.ipynb @@ -291,21 +291,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "Loading game data...: 86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 327/380 [05:01<00:50, 1.06it/s]/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/soccer_xg/.venv/lib/python3.11/site-packages/socceraction/data/wyscout/loader.py:281: UserWarning: A player with ID=0 was substituted in the 90th minute of game 2576016, but could not be found on the bench.\n", + "Loading game data...: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 327/380 [03:54<00:41, 1.29it/s]/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/soccer_xg/.venv/lib/python3.11/site-packages/socceraction/data/wyscout/loader.py:281: UserWarning: A player with ID=0 was substituted in the 90th minute of game 2576016, but could not be found on the bench.\n", " warnings.warn(\n", - "Loading game data...: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 373/380 [05:42<00:05, 1.20it/s]/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/soccer_xg/.venv/lib/python3.11/site-packages/socceraction/data/wyscout/loader.py:281: UserWarning: A player with ID=0 was substituted in the 56th minute of game 2575965, but could not be found on the bench.\n", + "Loading game data...: 98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 373/380 [04:25<00:04, 1.59it/s]/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/soccer_xg/.venv/lib/python3.11/site-packages/socceraction/data/wyscout/loader.py:281: UserWarning: A player with ID=0 was substituted in the 56th minute of game 2575965, but could not be found on the bench.\n", " warnings.warn(\n", "/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/soccer_xg/.venv/lib/python3.11/site-packages/socceraction/data/wyscout/loader.py:281: UserWarning: A player with ID=0 was substituted in the 62th minute of game 2575965, but could not be found on the bench.\n", " warnings.warn(\n", "/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/soccer_xg/.venv/lib/python3.11/site-packages/socceraction/data/wyscout/loader.py:281: UserWarning: A player with ID=0 was substituted in the 88th minute of game 2575965, but could not be found on the bench.\n", " warnings.warn(\n", - "Loading game data...: 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 377/380 [05:46<00:02, 1.03it/s]/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/soccer_xg/.venv/lib/python3.11/site-packages/socceraction/data/wyscout/loader.py:281: UserWarning: A player with ID=0 was substituted in the 74th minute of game 2575959, but could not be found on the bench.\n", + "Loading game data...: 99%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 377/380 [04:28<00:02, 1.31it/s]/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/soccer_xg/.venv/lib/python3.11/site-packages/socceraction/data/wyscout/loader.py:281: UserWarning: A player with ID=0 was substituted in the 74th minute of game 2575959, but could not be found on the bench.\n", " warnings.warn(\n", "/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/soccer_xg/.venv/lib/python3.11/site-packages/socceraction/data/wyscout/loader.py:281: UserWarning: A player with ID=0 was substituted in the 81th minute of game 2575959, but could not be found on the bench.\n", " warnings.warn(\n", "/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/soccer_xg/.venv/lib/python3.11/site-packages/socceraction/data/wyscout/loader.py:281: UserWarning: A player with ID=0 was substituted in the 84th minute of game 2575959, but could not be found on the bench.\n", " warnings.warn(\n", - "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 380/380 [05:48<00:00, 1.09it/s]\n" + "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 380/380 [04:29<00:00, 1.41it/s]\n" ] }, { @@ -319,7 +319,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 380/380 [05:41<00:00, 1.11it/s]\n" + "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 380/380 [04:22<00:00, 1.44it/s]\n" ] }, { @@ -333,7 +333,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 380/380 [05:47<00:00, 1.09it/s]\n" + "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 380/380 [04:30<00:00, 1.41it/s]\n" ] }, { @@ -347,7 +347,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 380/380 [05:47<00:00, 1.09it/s]\n" + "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 380/380 [04:21<00:00, 1.45it/s]\n" ] }, { @@ -361,7 +361,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 306/306 [04:41<00:00, 1.09it/s]\n" + "Loading game data...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 306/306 [03:26<00:00, 1.48it/s]\n" ] } ], @@ -372,7 +372,7 @@ " mode=\"w\"\n", ")\n", "for comp in comps:\n", - " # map wyscout IDs to internal IDs\n", + " # get name and id of competition\n", " competition_name, competition_id = comp['league']['name'], comp['league']['wy_id']\n", " season_name, season_id = comp['season']['name'], comp['season']['wy_id']\n", " print(f\"Importing {competition_name} {season_name} ...\")\n", @@ -635,47 +635,47 @@ " \n", " \n", " \n", - " 99430\n", - " 3158\n", - " Łukasz Skorupski\n", - " Ł. Skorupski\n", + " 21384\n", + " 3162\n", + " Ciro Immobile\n", + " C. Immobile\n", " \n", " \n", - " 22053\n", - " 3172\n", - " Francesco Rossi\n", - " F. Rossi\n", + " 20550\n", + " 3162\n", + " Ştefan Daniel Radu\n", + " Ş. Radu\n", " \n", " \n", - " 485464\n", - " 3219\n", - " Oliva Christian Rutjens\n", - " Christian Rutjens\n", + " 130\n", + " 3162\n", + " Stefan de Vrij\n", + " S. de Vrij\n", " \n", " \n", - " 354470\n", - " 3157\n", - " Emanuele Torrasi\n", - " E. Torrasi\n", + " 346908\n", + " 3162\n", + " Alessandro Murgia\n", + " A. Murgia\n", " \n", " \n", - " 343915\n", - " 3204\n", - " Gabriele Marchegiani\n", - " G. Marchegiani\n", + " 376362\n", + " 3162\n", + " Luiz Felipe Ramos Marchi\n", + " Luiz Felipe\n", " \n", " \n", "\n", "" ], "text/plain": [ - " team_id player_name nickname\n", - "player_id \n", - "99430 3158 Łukasz Skorupski Ł. Skorupski\n", - "22053 3172 Francesco Rossi F. Rossi\n", - "485464 3219 Oliva Christian Rutjens Christian Rutjens\n", - "354470 3157 Emanuele Torrasi E. Torrasi\n", - "343915 3204 Gabriele Marchegiani G. Marchegiani" + " team_id player_name nickname\n", + "player_id \n", + "21384 3162 Ciro Immobile C. Immobile\n", + "20550 3162 Ştefan Daniel Radu Ş. Radu\n", + "130 3162 Stefan de Vrij S. de Vrij\n", + "346908 3162 Alessandro Murgia A. Murgia\n", + "376362 3162 Luiz Felipe Ramos Marchi Luiz Felipe" ] }, "execution_count": 11, @@ -690,7 +690,9 @@ { "cell_type": "code", "execution_count": 12, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [ { "data": { @@ -713,7 +715,170 @@ " \n", " \n", " \n", + " game_id\n", + " period_id\n", + " milliseconds\n", + " team_id\n", + " player_id\n", + " type_id\n", + " type_name\n", + " subtype_id\n", + " subtype_name\n", + " positions\n", + " tags\n", + " \n", + " \n", + " event_id\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " 253668302\n", + " 2576335\n", + " 1\n", + " 2417.590\n", + " 3161\n", + " 3344\n", + " 8\n", + " Pass\n", + " 85\n", + " Simple pass\n", + " [{'y': 50, 'x': 49}, {'y': 58, 'x': 38}]\n", + " [{'id': 1801}]\n", + " \n", + " \n", + " 253668303\n", + " 2576335\n", + " 1\n", + " 3904.412\n", + " 3161\n", + " 116349\n", + " 8\n", + " Pass\n", + " 85\n", + " Simple pass\n", + " [{'y': 58, 'x': 38}, {'y': 91, 'x': 37}]\n", + " [{'id': 1801}]\n", + " \n", + " \n", + " 253668304\n", + " 2576335\n", + " 1\n", + " 6484.211\n", + " 3161\n", + " 135903\n", + " 8\n", + " Pass\n", + " 85\n", + " Simple pass\n", + " [{'y': 91, 'x': 37}, {'y': 72, 'x': 34}]\n", + " [{'id': 1801}]\n", + " \n", + " \n", + " 253668306\n", + " 2576335\n", + " 1\n", + " 10043.835\n", + " 3161\n", + " 138408\n", + " 8\n", + " Pass\n", + " 85\n", + " Simple pass\n", + " [{'y': 72, 'x': 34}, {'y': 14, 'x': 36}]\n", + " [{'id': 1801}]\n", + " \n", + " \n", + " 253668308\n", + " 2576335\n", + " 1\n", + " 14032.070\n", + " 3161\n", + " 21094\n", + " 8\n", + " Pass\n", + " 85\n", + " Simple pass\n", + " [{'y': 14, 'x': 36}, {'y': 39, 'x': 30}]\n", + " [{'id': 1801}]\n", + " \n", + " \n", + "\n", + "" + ], + "text/plain": [ + " game_id period_id milliseconds team_id player_id type_id \\\n", + "event_id \n", + "253668302 2576335 1 2417.590 3161 3344 8 \n", + "253668303 2576335 1 3904.412 3161 116349 8 \n", + "253668304 2576335 1 6484.211 3161 135903 8 \n", + "253668306 2576335 1 10043.835 3161 138408 8 \n", + "253668308 2576335 1 14032.070 3161 21094 8 \n", + "\n", + " type_name subtype_id subtype_name \\\n", + "event_id \n", + "253668302 Pass 85 Simple pass \n", + "253668303 Pass 85 Simple pass \n", + "253668304 Pass 85 Simple pass \n", + "253668306 Pass 85 Simple pass \n", + "253668308 Pass 85 Simple pass \n", + "\n", + " positions tags \n", + "event_id \n", + "253668302 [{'y': 50, 'x': 49}, {'y': 58, 'x': 38}] [{'id': 1801}] \n", + "253668303 [{'y': 58, 'x': 38}, {'y': 91, 'x': 37}] [{'id': 1801}] \n", + "253668304 [{'y': 91, 'x': 37}, {'y': 72, 'x': 34}] [{'id': 1801}] \n", + "253668306 [{'y': 72, 'x': 34}, {'y': 14, 'x': 36}] [{'id': 1801}] \n", + "253668308 [{'y': 14, 'x': 36}, {'y': 39, 'x': 30}] [{'id': 1801}] " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset.events(game_id=2576335).head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", " \n", + " \n", " \n", " \n", " \n", @@ -728,7 +893,6 @@ " \n", " \n", " \n", - " \n", " \n", " \n", " \n", @@ -742,12 +906,13 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", " \n", + " \n", " \n", " \n", " \n", @@ -763,6 +928,7 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -778,6 +944,7 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -793,6 +960,7 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -808,6 +976,7 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -826,32 +995,32 @@ "" ], "text/plain": [ - " period_id time_seconds team_id player_id start_x \\\n", - "game_id action_id \n", - "2576335 0 1 2.417590 3161 3344 53.55 \n", - " 1 1 3.904412 3161 116349 65.10 \n", - " 2 1 6.484211 3161 135903 66.15 \n", - " 3 1 10.043835 3161 138408 69.30 \n", - " 4 1 14.032070 3161 21094 67.20 \n", + " game_id period_id time_seconds team_id player_id start_x \\\n", + "action_id \n", + "0 2576335 1 2.417590 3161 3344 53.55 \n", + "1 2576335 1 3.904412 3161 116349 65.10 \n", + "2 2576335 1 6.484211 3161 135903 66.15 \n", + "3 2576335 1 10.043835 3161 138408 69.30 \n", + "4 2576335 1 14.032070 3161 21094 67.20 \n", "\n", - " start_y end_x end_y original_event_id bodypart_id \\\n", - "game_id action_id \n", - "2576335 0 34.00 65.10 39.44 253668302 0 \n", - " 1 39.44 66.15 61.88 253668303 0 \n", - " 2 61.88 69.30 48.96 253668304 0 \n", - " 3 48.96 67.20 9.52 253668306 0 \n", - " 4 9.52 73.50 26.52 253668308 0 \n", + " start_y end_x end_y original_event_id bodypart_id type_id \\\n", + "action_id \n", + "0 34.00 65.10 39.44 253668302 0 0 \n", + "1 39.44 66.15 61.88 253668303 0 0 \n", + "2 61.88 69.30 48.96 253668304 0 0 \n", + "3 48.96 67.20 9.52 253668306 0 0 \n", + "4 9.52 73.50 26.52 253668308 0 0 \n", "\n", - " type_id result_id \n", - "game_id action_id \n", - "2576335 0 0 1 \n", - " 1 0 1 \n", - " 2 0 1 \n", - " 3 0 1 \n", - " 4 0 1 " + " result_id \n", + "action_id \n", + "0 1 \n", + "1 1 \n", + "2 1 \n", + "3 1 \n", + "4 1 " ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -862,7 +1031,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": { "tags": [] }, @@ -870,18 +1039,11 @@ "source": [ "dataset.close()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "/home/pieterr/Jupiter/Projects/soccer_xg", + "display_name": "soccer_xg", "language": "python", "name": "soccer_xg" },
game_idperiod_idtime_secondsteam_idresult_id
game_idaction_id
25763350257633512.4175903161
1257633513.9044123161
2257633516.4842113161
32576335110.0438353161
42576335114.0320703161