Rename add_pairwise_comparison() to add_relative_skill()

epiforecasts · Mar 20, 2024 · 0eeaec2 · 0eeaec2
1 parent a3608d4
commit 0eeaec2
Show file tree

Hide file tree

Showing 8 changed files with 36 additions and 36 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -23,7 +23,7 @@ S3method(validate_forecast,forecast_binary)
 S3method(validate_forecast,forecast_point)
 S3method(validate_forecast,forecast_quantile)
 S3method(validate_forecast,forecast_sample)
-export(add_pairwise_comparison)
+export(add_relative_skill)
 export(ae_median_quantile)
 export(ae_median_sample)
 export(as_forecast)

diff --git a/NEWS.md b/NEWS.md
@@ -26,7 +26,7 @@ The update introduces breaking changes. If you want to keep using the older vers
 - `score()` now returns objects of class `scores` with a stored attribute `metrics` that holds the names of the scoring rules that were used. Users can call `get_metrics()` to access the names of those scoring rules. 
 - `check_forecasts()` was replaced by a different workflow. There now is a function, `as_forecast()`, that determines forecast type of the data, constructs a forecasting object and validates it using the function `validate_forecast()` (a generic that dispatches the correct method based on the forecast type). Objects of class `forecast_binary`, `forecast_point`, `forecast_sample` and `forecast_quantile` have print methods that fulfill the functionality of `check_forecasts()`.
 - Users can test whether an object is of class `forecast_*()` using the function `is_forecast()`. Users can also test for a specific `forecast_*` class using the appropriate `is_forecast.forecast_*` method. For example, to check whether an object is of class `forecast_quantile`, you would use you would use `scoringutils:::is_forecast.forecast_quantile()`.
-- The functionality for computing pairwise comparisons was now split from `summarise_scores()`. Instead of doing pairwise comparisons as part of summarising scores, a new function, `add_pairwise_comparison()`, was introduced that takes summarised scores as an input and adds columns with relative skil scores and scaled relative skill scores.
+- The functionality for computing pairwise comparisons was now split from `summarise_scores()`. Instead of doing pairwise comparisons as part of summarising scores, a new function, `add_relative_skill()`, was introduced that takes summarised scores as an input and adds columns with relative skill scores and scaled relative skill scores.
 - `add_coverage()` was replaced by a new function, `get_coverage()`. This function comes with an updated workflow where coverage values are computed directly based on the original data and can then be visualised using `plot_interval_coverage()` or `plot_quantile_coverage()`. An example worfklow would be `example_quantile |> as_forecast() |> get_coverage(by = "model") |> plot_interval_coverage()`.
 - Support for the interval format was mostly dropped (see PR #525 by @nikosbosse and reviewed by @seabbs)
     - The function `bias_range()` was removed (users should now use `bias_quantile()` instead)

diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R
@@ -515,7 +515,7 @@ permutation_test <- function(scores1,
 #' @inheritParams pairwise_comparison
 #' @export
 #' @keywords keyword scoring
-add_pairwise_comparison <- function(
+add_relative_skill <- function(
   scores,
   by = "model",
   metric = intersect(c("wis", "crps", "brier_score"), names(scores)),

diff --git a/README.Rmd b/README.Rmd
@@ -108,7 +108,7 @@ example_quantile %>%
     "location", "target_end_date", "target_type", "horizon", "model"
   )) %>%
   score() %>%
-  add_pairwise_comparison(
+  add_relative_skill(
     by = c("model", "target_type"), 
     baseline = "EuroCOVIDhub-ensemble"
   ) %>%

diff --git a/README.md b/README.md
@@ -126,7 +126,7 @@ example_quantile %>%
     "location", "target_end_date", "target_type", "horizon", "model"
   )) %>%
   score() %>%
-  add_pairwise_comparison(
+  add_relative_skill(
     by = c("model", "target_type"), 
     baseline = "EuroCOVIDhub-ensemble"
   ) %>%

diff --git a/man/add_pairwise_comparison.Rd b/man/add_pairwise_comparison.Rd
diff --git a/tests/testthat/test-pairwise_comparison.R b/tests/testthat/test-pairwise_comparison.R
@@ -65,11 +65,11 @@ test_that("pairwise_comparison() works", {
     )
   )
   eval_without_baseline <- suppressMessages(
-    add_pairwise_comparison(eval_without_rel_skill)
+    add_relative_skill(eval_without_rel_skill)
   )
 
   eval_with_baseline <- suppressMessages(
-    add_pairwise_comparison(eval_without_rel_skill, baseline = "m1")
+    add_relative_skill(eval_without_rel_skill, baseline = "m1")
   )
 
 
@@ -203,7 +203,7 @@ test_that("pairwise_comparison() works", {
 
   eval <- score(data_formatted)
   eval_summarised <- summarise_scores(eval, by = c("model", "location"))
-  eval_with_baseline <- add_pairwise_comparison(eval, by = c("model", "location"), baseline = "m1")
+  eval_with_baseline <- add_relative_skill(eval, by = c("model", "location"), baseline = "m1")
   eval_with_baseline <- summarise_scores(eval_with_baseline, by = c("model", "location"))
 
   relative_skills_with <- eval_with_baseline[
@@ -220,15 +220,15 @@ test_that("pairwise_comparison() works", {
 test_that("pairwise_comparison() work in score() with integer data", {
   eval <- suppressMessages(score(data = as_forecast(example_integer)))
   eval_summarised <- summarise_scores(eval, by = c("model", "target_type"))
-  eval <- add_pairwise_comparison(eval_summarised)
+  eval <- add_relative_skill(eval_summarised)
   expect_true("crps_relative_skill" %in% colnames(eval))
 })
 
 
 test_that("pairwise_comparison() work in score() with binary data", {
   eval <- suppressMessages(score(data = as_forecast(example_binary)))
   eval_summarised <- summarise_scores(eval, by = c("model", "target_type"))
-  eval <- add_pairwise_comparison(eval_summarised)
+  eval <- add_relative_skill(eval_summarised)
   expect_true("brier_score_relative_skill" %in% colnames(eval))
 })
 
@@ -256,15 +256,15 @@ test_that("pairwise_comparison() works", {
 })
 
 
-test_that("pairwise_comparison() and `add_pairwise_comparison()` give same result", {
+test_that("pairwise_comparison() and `add_relative_skill()` give same result", {
   eval <- scores_continuous
 
   pairwise <- pairwise_comparison(eval,
     by = "model",
     metric = "crps"
   )
 
-  eval2 <- add_pairwise_comparison(scores_continuous, by = "model")
+  eval2 <- add_relative_skill(scores_continuous, by = "model")
   eval2 <- summarise_scores(eval2, by = "model")
 
   expect_equal(
@@ -279,20 +279,20 @@ test_that("pairwise_comparison() realises when there is no baseline model", {
   )
 })
 
-test_that("Basic input checks for `add_pairwise_comparison() work", {
+test_that("Basic input checks for `add_relative_skill() work", {
   eval <- data.table::copy(scores_continuous)
 
   # check that model column + columns in 'by' + baseline model are present
   expect_error(
-    add_pairwise_comparison(
+    add_relative_skill(
       eval, by = c("model", "missing"), metric = "crps"
     ),
     "Not all columns specified in `by` are present:"
   )
 
   # error if baseline is not present
   expect_error(
-    add_pairwise_comparison(
+    add_relative_skill(
       eval, by = "model", baseline = "missing", metric = "crps"
     ),
     "Assertion on 'baseline' failed: Must be a subset of"
@@ -301,12 +301,12 @@ test_that("Basic input checks for `add_pairwise_comparison() work", {
   # error if not enough models are present
   eval_few <- eval[model %in% c("EuroCOVIDhub-ensemble", "EuroCOVIDhub-baseline")]
   expect_no_error(
-    add_pairwise_comparison(
+    add_relative_skill(
       eval_few, by = "model", metric = "crps"
     )
   )
   expect_error(
-    add_pairwise_comparison(
+    add_relative_skill(
       eval_few, by = "model", baseline = "EuroCOVIDhub-baseline",
       metric = "crps"
     ),
@@ -315,14 +315,14 @@ test_that("Basic input checks for `add_pairwise_comparison() work", {
 
   # error if no relative skill metric is found
   expect_error(
-    add_pairwise_comparison(
+    add_relative_skill(
       eval, by = "model",
       metric = "missing"
     )
   )
   eval_nometric <- data.table::copy(eval)[, "crps" := NULL]
   expect_error(
-    suppressWarnings(add_pairwise_comparison(
+    suppressWarnings(add_relative_skill(
       eval_nometric, by = "model"
     )),
     "Assertion on 'metric' failed: Must be a subset of "
@@ -331,7 +331,7 @@ test_that("Basic input checks for `add_pairwise_comparison() work", {
   # error if no model column is found
   eval_nomodel <- data.table::copy(eval)[, "model" := NULL]
   expect_error(
-    add_pairwise_comparison(
+    add_relative_skill(
       eval_nomodel, by = "target_type", metric = "crps"
     ),
     "Assertion on 'scores' failed: Column 'model' not found in data."
@@ -341,7 +341,7 @@ test_that("Basic input checks for `add_pairwise_comparison() work", {
   eval_noattribute <- data.table::copy(eval)
   attr(eval_noattribute, "metrics") <- NULL
   expect_error(
-    add_pairwise_comparison(
+    add_relative_skill(
       eval_noattribute, by = "model", metric = "crps"
     ),
     "needs an attribute `metrics`"
@@ -351,7 +351,7 @@ test_that("Basic input checks for `add_pairwise_comparison() work", {
   eval_nas <- data.table::copy(eval)
   eval_nas[1:10, "crps" := NA]
   expect_warning(
-    add_pairwise_comparison(
+    add_relative_skill(
       eval_nas, by = "model", metric = "crps"
     ),
     "Some values for the metric `crps` are NA. These have been removed."
@@ -360,7 +360,7 @@ test_that("Basic input checks for `add_pairwise_comparison() work", {
   # warning if there are no values left after removing NAs
   eval_nas[, "crps" := NA]
   expect_error(
-    add_pairwise_comparison(
+    add_relative_skill(
       eval_nas, by = "model", metric = "crps"
     ),
     "After removing \"NA\" values for `crps`, no values were left."
@@ -370,7 +370,7 @@ test_that("Basic input checks for `add_pairwise_comparison() work", {
   eval_diffsign <- data.table::copy(eval)
   eval_diffsign[1:10, "crps" := -eval_diffsign[1:10, "crps"]]
   expect_error(
-    add_pairwise_comparison(
+    add_relative_skill(
       eval_diffsign, by = "model", metric = "crps"
     ),
     "To compute pairwise comparisons, all values of `crps` must have the same sign."
@@ -379,15 +379,15 @@ test_that("Basic input checks for `add_pairwise_comparison() work", {
   # message if `by` is equal to the forecast unit
   fu <- get_forecast_unit(eval)
   expect_message(
-    add_pairwise_comparison(
+    add_relative_skill(
       eval, by = fu, metric = "crps"),
     "relative skill can only be computed if `by` is different from the unit of a single forecast."
   )
 
   # warning if by is equal to the forecast unit and also by is "model"
   eval_summ <- summarise_scores(eval, by = "model")
   expect_warning(
-    add_pairwise_comparison(
+    add_relative_skill(
       eval_summ, by = "model", metric = "crps"
     ),
     "`by` is set to 'model', which is also the unit of a single forecast."
@@ -448,9 +448,9 @@ test_that("compare_two_models() throws error with wrong inputs", {
   )
 })
 
-test_that("add_pairwise_comparison() works with point forecasts", {
+test_that("add_relative_skill() works with point forecasts", {
   expect_no_condition(
-    pw_point <- add_pairwise_comparison(
+    pw_point <- add_relative_skill(
       scores_point,
       metric = "se_point"
     )
@@ -467,8 +467,8 @@ test_that("add_pairwise_comparison() works with point forecasts", {
   )
 })
 
-test_that("add_pairwise_comparison() can compute relative measures", {
-  scores_with <- add_pairwise_comparison(
+test_that("add_relative_skill() can compute relative measures", {
+  scores_with <- add_relative_skill(
     scores_quantile,
   )
   scores_with <- summarise_scores(scores_with, by = "model")
@@ -478,7 +478,7 @@ test_that("add_pairwise_comparison() can compute relative measures", {
     c(1.6, 0.81, 0.75, 1.03), tolerance = 0.01
   )
 
-  scores_with <- add_pairwise_comparison(
+  scores_with <- add_relative_skill(
     scores_quantile, by = "model",
     metric = "ae_median"
   )

diff --git a/vignettes/scoringutils.Rmd b/vignettes/scoringutils.Rmd
@@ -181,7 +181,7 @@ In order to better compare models against each other we can use relative scores
 
 ```{r}
 score(as_forecast(example_quantile)) %>%
-  add_pairwise_comparison(
+  add_relative_skill(
     by = c("model", "target_type"),
     baseline = "EuroCOVIDhub-ensemble"
   ) %>%
@@ -295,7 +295,7 @@ forecast_quantile %>%
 forecast_quantile %>%
   score() %>%
   summarise_scores(by = "model") %>%
-  add_pairwise_comparison(baseline = "EuroCOVIDhub-baseline")
+  add_relative_skill(baseline = "EuroCOVIDhub-baseline")
 ```
 
 If using the `pairwise_comparison()` function, we can also visualise pairwise comparisons by showing the mean score ratios between models. By default, smaller values are better and the model we care about is showing on the y axis on the left, while the model against it is compared is shown on the x-axis on the bottom. In the example above, the EuroCOVIDhub-ensemble performs best (it only has values smaller 1), while the EuroCOVIDhub-baseline performs worst (and only has values larger than 1). For cases, the UMass-MechBayes model is of course excluded as there are no case forecasts available and therefore the set of overlapping forecasts is empty.