-
Notifications
You must be signed in to change notification settings - Fork 0
/
playstyle_wheels.R
341 lines (259 loc) · 15.2 KB
/
playstyle_wheels.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
# load libraries
library(worldfootballR)
library(tidyverse)
library(extrafont)
library(shiny)
library(geomtextpath)
library(readr)
# leagues of interest
list_of_leagues <- c('ENG', 'ITA', 'NED', 'SPA', 'POR', 'GER', 'FRA','BRA','ARG', 'BEL', 'MEX', 'USA')
# 2. no scandinavian data
# 3. no Russia, Turkey, Ukraine, Switzerland, data
## function to get the data for 7 leagues in one season and calculate all the metrics
get_leagues <- function(season, list_of_leagues){
# standard stats
d <- fb_season_team_stats(country = list_of_leagues,
gender = "M",
season_end_year = season,
tier = "1st",
stat_type = c('standard'))
# get the possession data
d_possession <- fb_season_team_stats(country = list_of_leagues,
gender = "M",
season_end_year = season,
tier = "1st",
stat_type = c('possession'))
# get the miscellaneous data
d_misc <- fb_season_team_stats(country = list_of_leagues,
gender = "M",
season_end_year = season,
tier = "1st",
stat_type = c('misc'))
# get the defensive action data
d_defense <- fb_season_team_stats(country = list_of_leagues,
gender = "M",
season_end_year = season,
tier = "1st",
stat_type = c('defense'))
# get the passing type data
d_passing_types <- fb_season_team_stats(country = list_of_leagues,
gender = "M",
season_end_year = season,
tier = "1st",
stat_type = c('passing_types'))
# get the passing data
d_passing <- fb_season_team_stats(country = list_of_leagues,
gender = "M",
season_end_year = season,
tier = "1st",
stat_type = c('passing'))
# get the advanced goalkeeping data
d_keeper_adv <- fb_season_team_stats(country = list_of_leagues,
gender = "M",
season_end_year = season,
tier = "1st",
stat_type = c('keeper_adv'))
# get the shooting data
d_shooting <- fb_season_team_stats(country = list_of_leagues,
gender = "M",
season_end_year = season,
tier = "1st",
stat_type = c('shooting'))
## Create new variables that are required to calculate metrics for the plot
# create new variable that adds middle and attacking third touches to the main data frame
d$mid_att_touches_opponent <- d_possession$`Mid 3rd_Touches` + d_possession$`Att 3rd_Touches`
# create opponent offside to the main data frame
d$offsides_opponent <- d_misc$Off
# create opponent through balls to the main data frame
d$through_balls_opponent <- d_passing_types$TB_Pass_Types
# create defensive actions of goalkeepers outside of the box
d$goalkeeper_outBox <- d_keeper_adv$`#OPA_Sweeper`
# create passes into final third made by opponent
d$final_third_passes_opponent <- d_passing$Final_Third
# create goalkeeper passes longer than 40 yards (it's measured in %)
d$launch <- d_keeper_adv$Att_Launched/ (d_keeper_adv$Att_Goal_Kicks + d_keeper_adv$`Att (GK)_Passes`)
# create press resistance variable
d$tackles_def_mid_opponent <- d_defense$`Def 3rd_Tackles` + d_defense$`Mid 3rd_Tackles`
d$touches_def_mid <- d_possession$`Def 3rd_Touches` + d_possession$`Mid 3rd_Touches`
# create central progression variable
d$central_progression <- (d_passing_types$Crs_Pass_Types/ d_passing_types$Live_Pass_Types) * 100
# create circulate variable
d$circulate <- (d_possession$PrgDist_Carries + d_passing$PrgDist_Total) / d_passing$TotDist_Total
# create variable touches in final third
d$touches_final <- d_possession$`Att 3rd_Touches`
# create variable for patient attack
d$patient_attack <- (d_shooting$Sh_Standard / d_possession$`Att 3rd_Touches`) * 100
# create variable for shot quality
d$shot_quality <- d_shooting$xG_Expected / d_shooting$Sh_Standard
# subset stats for each team
d_for <- d %>%
subset(Team_or_Opponent == 'team')
# subset stats against each team
d_against <- d %>%
subset(Team_or_Opponent == 'opponent')
## Create new variables that are required to calculate metrics for the plot (split for/against required)
# calculate a variable 'High Line'. it's a sum of conceded offsides, throught balls and goalkeeper actions outside of the box devided by all opponent passes into the final third
d_against$high_line <- (d_against$offsides_opponent +
d_against$through_balls_opponent +
d_for$goalkeeper_outBox) / d_against$final_third_passes_opponent
# append press resistance variable
d_for$press_resistance <- d_for$touches_def_mid / d_against$tackles_def_mid_opponent
# create variable field tilt
d_for$field_tilt <- (d_for$touches_final / (d_for$touches_final + d_against$touches_final)) * 100
### Defense
## Chance prevention
# Calculate the percentile ranks (inverted for reverse variable)
ecdf_chance_prevention <- ecdf(d_against$npxG_Expected) # Create the ECDF based on npxG
d_against$chance_prevention <- sapply(d_against$npxG_Expected, function(x) (1 - ecdf_chance_prevention(x)) * 100) # Apply ECDF to each X value to get percentiles
## Intensity
ecdf_intensity <- ecdf(d_against$mid_att_touches_opponent) # Create the ECDF based on npxG
d_against$intensity <- sapply(d_against$mid_att_touches_opponent, function(x) (1 - ecdf_intensity(x)) * 100) # Apply ECDF to each X value to get percentiles
## High Line
# Calucalte the percentile ranks
ecdf_high_line <- ecdf(d_against$high_line) # Create the ECDF based on npxG
d_against$high_line_percentile <- sapply(d_against$high_line, function(x) ecdf_high_line(x) * 100) # Apply ECDF to each X value to get percentiles
### Possession
## Deep build-up
ecdf_deep_buildup <- ecdf(d_for$launch) # Create the ECDF based on npxG
d_for$deep_buildup <- sapply(d_for$launch, function(x) (1 - ecdf_deep_buildup(x)) * 100) # Apply ECDF to each X value to get percentiles
## Press resistance
ecdf_press_resistance <- ecdf(d_for$press_resistance) # Create the ECDF based on npxG
d_for$press_resistance_percentile <- sapply(d_for$press_resistance, function(x) ecdf_press_resistance(x) * 100) # Apply ECDF to each X value to get percentiles
## Possession
ecdf_possession <- ecdf(d_for$Poss) # Create the ECDF based on npxG
d_for$possession <- sapply(d_for$Poss, function(x) ecdf_possession(x) * 100) # Apply ECDF to each X value to get percentiles
### Progression
## Central progression
ecdf_central_progression <- ecdf(d_for$central_progression) # Create the ECDF based on npxG
d_for$central_progression_percentile <- sapply(d_for$central_progression, function(x) (1 - ecdf_central_progression(x)) * 100) # Apply ECDF to each X value to get percentiles
## Circulate
ecdf_circulate <- ecdf(d_for$circulate) # Create the ECDF based on npxG
d_for$circulate_percentile <- sapply(d_for$circulate, function(x) (1 - ecdf_circulate(x)) * 100) # Apply ECDF to each X value to get percentiles
## Field tilt
ecdf_field_tilt <- ecdf(d_for$field_tilt) # Create the ECDF based on npxG
d_for$field_tilt_percentile <- sapply(d_for$field_tilt, function(x) ecdf_field_tilt(x) * 100) # Apply ECDF to each X value to get percentiles
### Attack
## Chance creation
ecdf_chance_creation <- ecdf(d_for$npxG_Per_Minutes) # Create the ECDF based on npxG
d_for$chance_creation <- sapply(d_for$npxG_Per_Minutes, function(x) ecdf_chance_creation(x) * 100) # Apply ECDF to each X value to get percentiles
## Patient attack
ecdf_patient_attack <- ecdf(d_for$patient_attack) # Create the ECDF based on npxG
d_for$patient_attack_percentile <- sapply(d_for$patient_attack, function(x) (1 - ecdf_patient_attack(x)) * 100) # Apply ECDF to each X value to get percentiles
## Shot Quality
ecdf_shot_quality <- ecdf(d_for$shot_quality) # Create the ECDF based on npxG
d_for$shot_quality_percentile <- sapply(d_for$shot_quality, function(x) ecdf_shot_quality(x) * 100) # Apply ECDF to each X value to get percentiles
#### Select only relevant variables for plotting
d_playstyle_for <- d_for %>%
select(Competition_Name, Country, Season_End_Year, Squad, Age, deep_buildup, press_resistance_percentile,
possession, central_progression_percentile, circulate_percentile, field_tilt_percentile,
chance_creation, patient_attack_percentile, shot_quality_percentile)
d_playstyle_against <- d_against %>%
select(Competition_Name, Country, Season_End_Year, Squad, Age, chance_prevention, intensity,
high_line_percentile)
d_playstyle <- cbind(d_playstyle_for, d_playstyle_against$chance_prevention, d_playstyle_against$intensity,
d_playstyle_against$high_line_percentile)
# order variables
d_playstyle <- d_playstyle[c("Competition_Name", "Country", "Season_End_Year", "Squad", "Age",
"d_playstyle_against$chance_prevention", "d_playstyle_against$intensity",
"d_playstyle_against$high_line_percentile", "deep_buildup",
"press_resistance_percentile", "possession", "central_progression_percentile",
"circulate_percentile", "field_tilt_percentile", "chance_creation",
"patient_attack_percentile", "shot_quality_percentile")]
names(d_playstyle) <- c("Competition_Name", "Country", "Season_End_Year", "Squad", "Age", 'Chance_Prevention',
'Intensity', 'High_Line', 'Deep_buildup', 'Press_Resistance', 'Possession', 'Central_Progression',
'Circulation', 'Field_Tilt', 'Chance_Creation', 'Patient_Attack', 'Shot_Quality')
#change data to long
d_playstale_long <- pivot_longer(
d_playstyle,
cols = names(d_playstyle)[6:17], # Selects all columns that start with 'Var'
names_to = "Metric", # New column for variable names
values_to = "Value" # New column for variable values
)
# change metric to factor
d_playstale_long$Metric <- factor(d_playstale_long$Metric)
# add a variable to separate into four categories of different aspects of the game
d_playstale_long <- d_playstale_long %>%
mutate(Category = factor(case_when(
Metric %in% c('Chance_Prevention','Intensity', 'High_Line') ~ 'Defensive',
Metric %in% c('Deep_buildup', 'Press_Resistance', 'Possession') ~ 'Possesion',
Metric %in% c('Central_Progression','Circulation', 'Field_Tilt') ~ 'Progression',
Metric %in% c('Chance_Creation', 'Patient_Attack', 'Shot_Quality') ~ 'Attacking',
TRUE ~ 'None'
)))
# reorder levels of metrics so that they are separated into four quadrants
d_playstale_long$Metric <- factor(d_playstale_long$Metric, levels = c('Chance_Prevention','Intensity', 'High_Line',
'Deep_buildup', 'Press_Resistance', 'Possession',
'Central_Progression','Circulation', 'Field_Tilt',
'Chance_Creation', 'Patient_Attack', 'Shot_Quality'))
return(d_playstale_long)
}
seasons_to_extract <- 2019:2024
d_full <- data.frame()
for (season in seasons_to_extract){
d_tmp <- get_leagues(season, list_of_leagues)
d_full<-rbind(d_full, d_tmp)
}
#change year to string for r shiny app selection
d_full$Season_End_Year <- factor(d_full$Season_End_Year)
### radar plot with ggplot2 - example
team_to_plot <- d_full[d_full$Squad == 'Brighton' & d_full$Season_End_Year == '2024' ,]
plot_playstyle_wheel <- ggplot(team_to_plot) +
# Make custom panel grid
geom_col(aes(x = Metric, y = Value, fill = Category), position = "dodge2", show.legend = TRUE, alpha = .9) +
geom_vline(xintercept = 1:13 - 0.5, color = "gray90", alpha = 0.5) +
geom_hline(yintercept =c(0,25,50,75,100) , color = "gray90", alpha = 0.5) +
# Make it circular!
coord_curvedpolar() +
geom_text(aes(x = Metric, y = Value + 5, label = round(Value)), color = "black", fontface = 'bold') +
scale_x_discrete(labels = c('Chance Prevention','Intensity', 'High Line', 'Deep buildup', 'Press Resistance',
'Possession', 'Central Progression','Circulation', 'Field Tilt','Chance Creation',
'Patient Attack', 'Shot Quality')) +
scale_fill_manual(values = c("#007D8C", "#FF6F61", "#FFD662", "#708090")) +
theme(text = element_text(family = "Source Sans Pro", face = 'bold', size = 12),
panel.background = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "bottom")
plot_playstyle_wheel
#save the data to csv
write_csv(d_full, 'data_playstyle_wheel.csv')
### make point-line plots for each metric over all available seasons
data_playstyle_wheel <- read_csv("data/data_playstyle_wheel.csv")
# # change to wide
# d_full <- data_playstyle_wheel %>%
# select(-Category) %>%
# pivot_wider( names_from = Metric, values_from = Value)
### radar plot with ggplot2 - example
# order factors for the plots
data_playstyle_wheel$Metric <- factor(data_playstyle_wheel$Metric, levels = c('Chance_Prevention','Intensity', 'High_Line',
'Deep_buildup', 'Press_Resistance', 'Possession',
'Central_Progression','Circulation', 'Field_Tilt',
'Chance_Creation', 'Patient_Attack', 'Shot_Quality'))
team_to_plot <- data_playstyle_wheel[data_playstyle_wheel$Squad == 'Brighton' ,]
plot_playstyle_wheel_over_seasons <- ggplot(team_to_plot) +
geom_point(aes(x = Season_End_Year, y = Value, group = Metric)) +
geom_line(aes(x = Season_End_Year, y = Value, group = Metric)) +
facet_wrap(~ Metric, ncol = 3, nrow = 4) +
theme_bw() +
scale_y_continuous(breaks = seq(0,100,20)) +
theme(
text = element_text(family = "Source Sans Pro", face = 'bold'),
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
panel.background = element_rect(fill = "white"), # Ensure background is white
panel.border = element_blank(),
panel.grid.major.y = element_line(color = "grey", size = 0.5), # Add major y grid lines
panel.grid.major.x = element_blank(), # Remove major x grid lines
strip.background = element_blank(), # Remove the gray box around facet titles
strip.text = element_text(size = 12, face = "bold"), # Customize facet title text
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
axis.ticks.x = element_line()
) +
labs(title = "Metrics Over Seasons",
x = "Season",
y = "Value")
plot_playstyle_wheel_over_seasons