Merge branch 'main' into pkgdown-update

epiforecasts · Jan 17, 2024 · 1e8d12e · 1e8d12e
2 parents 38efa94 + c523ee1
commit 1e8d12e
Show file tree

Hide file tree

Showing 36 changed files with 300 additions and 261 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,11 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+
+version: 2
+updates:
+  - package-ecosystem: "github-actions" # See documentation for possible values
+    directory: "/" # Location of package manifests
+    schedule:
+      interval: "weekly"
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -41,5 +41,6 @@ jobs:
         if: github.event_name != 'pull_request'
         uses: JamesIves/github-pages-deploy-action@v4
         with:
+          clean: false
           branch: gh-pages
           folder: docs
diff --git a/.github/workflows/render_readme.yaml b/.github/workflows/render_readme.yaml
@@ -1,10 +1,8 @@
-# Name of the workflow
 name: render-readme
 
-# Controls when the action will run. Triggers include:
-#
-# - button trigger from github action page
-# - on changes to readme.Rmd
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref }}
+  cancel-in-progress: true
 
 on:
   workflow_dispatch:
@@ -13,19 +11,14 @@ on:
       - 'README.Rmd'
       - DESCRIPTION
 
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-# A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
   render-readme:
     runs-on: macos-latest
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
     steps:
       - name: Checkout repos
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Setup R
         uses: r-lib/actions/setup-r@v2
@@ -47,6 +40,6 @@ jobs:
         run: |
           git config --local user.email "[email protected]"
           git config --local user.name "GitHub Action"
-          git add README.md 
-          git diff-index --quiet HEAD || git commit -m "Automatic readme update"
-          git status -sb | grep -q ahead && git push origin
+          git add README.md man/figures/
+          git diff-index --quiet HEAD || git commit -m "Automatic readme update [ci skip]"
+          git push origin || echo "No changes to push"
diff --git a/NAMESPACE b/NAMESPACE
@@ -30,9 +30,9 @@ export(get_duplicate_forecasts)
 export(get_forecast_counts)
 export(get_forecast_type)
 export(get_forecast_unit)
-export(interval_coverage_dev_quantile)
-export(interval_coverage_quantile)
-export(interval_coverage_sample)
+export(get_score_names)
+export(interval_coverage)
+export(interval_coverage_deviation)
 export(log_shift)
 export(logs_binary)
 export(logs_sample)

diff --git a/NEWS.md b/NEWS.md
@@ -21,7 +21,7 @@ The update introduces breaking changes. If you want to keep using the older vers
     - `observed`: numeric, either a scalar or a vector
     - `predicted`: numeric, a vector (if `observed` is a scalar) or a matrix (if `observed` is a vector)
     - `quantile`: numeric, a vector with quantile-levels. Can alternatively be a matrix of the same shape as `predicted`.
-- Users can now supply their own scoring rules to `score()` as a list of functions. Default scoring rules can be accessed using the functions `rules_point()`, `rules_sample()`, `rules_quantile()` and `rules_binary()`, which return a list of scoring rules suitable for the respective forecast type.
+- Users can now supply their own scoring rules to `score()` as a list of functions. Default scoring rules can be accessed using the functions `rules_point()`, `rules_sample()`, `rules_quantile()` and `rules_binary()`, which return a named list of scoring rules suitable for the respective forecast type. Column names of scores in the output of `score()` correspond to the names of the scoring rules (i.e. the names of the functions in the list of scoring rules). Users can call `get_score_names()` on the output of `score()` to get the names of the scores used. 
 - `check_forecasts()` was replaced by a different workflow. There now is a function, `as_forecast()`, that determines forecast type of the data, constructs a forecasting object and validates it using the function `validate_forecast()` (a generic that dispatches the correct method based on the forecast type). Objects of class `forecast_binary`, `forecast_point`, `forecast_sample` and `forecast_quantile` have print methods that fulfill the functionality of `check_forecasts()`.
 - The functionality for computing pairwise comparisons was now split from `summarise_scores()`. Instead of doing pairwise comparisons as part of summarising scores, a new function, `add_pairwise_comparison()`, was introduced that takes summarised scores as an input and adds pairwise comparisons to it. 
 - `add_coverage()` was reworked completely. It's new purpose is now to add coverage information to the raw forecast data (essentially fulfilling some of the functionality that was previously covered by `score_quantile()`)
@@ -38,9 +38,11 @@ The update introduces breaking changes. If you want to keep using the older vers
 - Files ending in ".Rda" were renamed to ".rds" where appropriate when used together with `saveRDS()` or `readRDS()`.
 - `score()` now calls `na.omit()` on the data, instead of only removing rows with missing values in the columns `observed` and `predicted`. This is because `NA` values in other columns can also mess up e.g. grouping of forecasts according to the unit of a single forecast.
 - added documentation for the return value of `summarise_scores()`.
-- Removed abs_error and squared_error from the package in favour of `Metrics::ae` and `Metrics::se`. 
+- Removed abs_error and squared_error from the package in favour of `Metrics::ae` and `Metrics::se`.
+- Renamed `interval_coverage_quantile()` and `interval_coverage_dev_quantile()` to `interval_coverage()` and `interval_coverage_deviation()`, respectively. Removed `interval_coverage_sample()` as users are now expected to convert to a quantile format first before scoring.
 - Added unit tests for `interval_coverage_quantile()` and `interval_coverage_dev_quantile()` in order to make sure that the functions provide the correct warnings when insufficient quantiles are provided.
 - Documentation pkgdown pages are now created both for the stable and dev versions.
+- Output columns for pairwise comparisons have been renamed to contain the name of the metric used for comparing.
 
 # scoringutils 1.2.2
 
@@ -50,7 +52,7 @@ The update introduces breaking changes. If you want to keep using the older vers
 
 ## Bug fixes
 - Fixes a bug with `set_forecast_unit()` where the function only workded with a data.table, but not a data.frame as an input. 
-- The metrics table in the vignette [Details on the metrics implemented in `scoringutils`](https://epiforecasts.io/scoringutils/articles/metric-details.html) had duplicated entries. This was fixed by removing the duplicated rows. 
+- The metrics table in the vignette [Details on the metrics implemented in `scoringutils`](https://epiforecasts.io/scoringutils/articles/metric-details.html) had duplicated entries. This was fixed by removing the duplicated rows.
 
 # scoringutils 1.2.1
 

diff --git a/R/add_coverage.R b/R/add_coverage.R
@@ -62,7 +62,7 @@ add_coverage <- function(data) {
 
   data[, range := get_range_from_quantile(quantile)]
 
-  data <- merge(interval_data, data, by = unique(c(forecast_unit, "range")))
+  data <- merge(data, interval_data, by = unique(c(forecast_unit, "range")))
   data[, interval_coverage_deviation := interval_coverage - range / 100]
   data[, quantile_coverage := observed <= predicted]
   data[, quantile_coverage_deviation := quantile_coverage - quantile]
@@ -74,8 +74,8 @@ add_coverage <- function(data) {
 
   # add coverage "metrics" to list of stored metrics
   # this makes it possible to use `summarise_scores()` later on
-  stored_attributes[["metric_names"]] <- c(
-    stored_attributes[["metric_names"]],
+  stored_attributes[["score_names"]] <- c(
+    stored_attributes[["score_names"]],
     new_metrics
   )
   data <- assign_attributes(data, stored_attributes)

diff --git a/R/convenience-functions.R b/R/convenience-functions.R
@@ -108,7 +108,7 @@ transform_forecasts <- function(data,
                                 append = TRUE,
                                 label = "log",
                                 ...) {
-  original_data <- as.data.table(data)
+  original_data <- as_forecast(data)
   scale_col_present <- ("scale" %in% colnames(original_data))
 
   # Error handling
@@ -131,10 +131,9 @@ transform_forecasts <- function(data,
 
   if (append) {
     if (scale_col_present) {
-      transformed_data <-
-        data.table::copy(original_data)[scale == "natural"]
+      transformed_data <- copy(original_data)[scale == "natural"]
     } else {
-      transformed_data <- data.table::copy(original_data)
+      transformed_data <- copy(original_data)
       original_data[, scale := "natural"]
     }
     transformed_data[, predicted := fun(predicted, ...)]

diff --git a/R/correlations.R b/R/correlations.R
@@ -21,7 +21,7 @@
 correlation <- function(scores,
                         metrics = NULL,
                         digits = NULL) {
-  metrics <- get_metrics(scores)
+  metrics <- get_score_names(scores)
 
   # if quantile column is present, throw a warning
   if ("quantile" %in% names(scores)) {

diff --git a/R/default-scoring-rules.R b/R/default-scoring-rules.R
@@ -130,17 +130,17 @@ rules_sample <- function(select = NULL, exclude = NULL) {
 #' - "underprediction" = [underprediction()]
 #' - "dispersion" = [dispersion()]
 #' - "bias" = [bias_quantile()]
-#' - "interval_coverage_50" = [interval_coverage_quantile()]
+#' - "interval_coverage_50" = [interval_coverage()]
 #' - "interval_coverage_90" = function(...) \{
-#'      run_safely(..., range = 90, fun = [interval_coverage_quantile])
+#'      run_safely(..., range = 90, fun = [interval_coverage])
 #'   \}
-#' - "interval_coverage_deviation" = [interval_coverage_dev_quantile()],
+#' - "interval_coverage_deviation" = [interval_coverage_deviation()],
 #' - "ae_median" = [ae_median_quantile()]
 #'
 #' Note: The `coverage_90` scoring rule is created as a wrapper around
-#' [interval_coverage_quantile()], making use of the function [run_safely()].
+#' [interval_coverage()], making use of the function [run_safely()].
 #' This construct allows the function to deal with arbitrary arguments in `...`,
-#' while making sure that only those that [interval_coverage_quantile()] can
+#' while making sure that only those that [interval_coverage()] can
 #' accept get passed on to it. `range = 90` is set in the function definition,
 #' as passing an argument `range = 90` to [score()] would mean it would also
 #' get passed to `coverage_50`.
@@ -157,11 +157,11 @@ rules_quantile <- function(select = NULL, exclude = NULL) {
     underprediction = underprediction,
     dispersion = dispersion,
     bias = bias_quantile,
-    interval_coverage_50 = interval_coverage_quantile,
+    interval_coverage_50 = interval_coverage,
     interval_coverage_90 = function(...) {
-      run_safely(..., range = 90, fun = interval_coverage_quantile)
+      run_safely(..., range = 90, fun = interval_coverage)
     },
-    interval_coverage_deviation = interval_coverage_dev_quantile,
+    interval_coverage_deviation = interval_coverage_deviation,
     ae_median = ae_median_quantile
   )
   selected <- select_rules(all, select, exclude)

diff --git a/R/get_-functions.R b/R/get_-functions.R
@@ -125,22 +125,50 @@ get_type <- function(x) {
 }
 
 
-#' @title Get metrics that were used for scoring
-#' @description Internal helper function to get the metrics that were used
-#' to score forecasts.
-#' @param scores A data.table with an attribute `metric_names`
-#' @return Character vector with the metrics that were used for scoring.
-#' @keywords internal_input_check
-get_metrics <- function(scores) {
-  metric_names <- attr(scores, "metric_names")
-  if (is.null(metric_names)) {
-    stop("The data needs to have an attribute `metric_names` with the names ",
-         " of the metrics that were used for scoring. This should be the case ",
-         "if the data was produced using `score()`. Either run `score()` ",
-         "again, or set the attribute manually using ",
-         "`attr(data, 'metric_names') <- names_of_the_scoring_metrics")
+#' @title Get Names Of The Scoring Rules That Were Used For Scoring
+#' @description
+#' When applying a scoring rule, (for example through [score()] or
+#' [add_coverage()], the names of the scoring rules become column names of the
+#' resulting data.table. In addition, an attribute `score_names` will be
+#' added to the output, holding the names of the scores as a vector.
+#' This is done so that a function like [get_forecast_unit()] can still
+#' identify which columns are part of the forecast unit and which hold a score.
+#'
+#' `get_score_names()` access and returns this attribute. If there is no
+#' attribute, the function will return NULL. Users can control whether the
+#' function should error instead via the `error` argument.
+#'
+#' `get_score_names()` also checks whether the names of the scores stored in
+#' the attribute are column names of the data and will throw a warning if not.
+#' This can happen if you rename columns after scoring. You can either run
+#' [score()] again, specifying names for the scoring rules manually, or you
+#' can update the attribute manually using
+#' `attr(scores, "score_names") <- c("names", "of", "your", "scores")` (the
+#' order does not matter).
+#'
+#' @param scores A data.table with an attribute `score_names`
+#' @param error Throw an error if there is no attribute called `score_names`?
+#' Default is FALSE.
+#' @return Character vector with the names of the scoring rules that were used
+#' for scoring or `NULL` if no scores were computed previously.
+#' @keywords check-forecasts
+#' @export
+get_score_names <- function(scores, error = FALSE) {
+  score_names <- attr(scores, "score_names")
+  if (error && is.null(score_names)) {
+    stop("Object needs an attribute `score_names` with the names of the ",
+         "scoring rules that were used for scoring. ",
+         "See `?get_score_names` for further information.")
+  }
+
+  if (!all(score_names %in% names(scores))) {
+    missing <- setdiff(score_names, names(scores))
+    warning("The following scores have been previously computed, but are no ",
+            "longer column names of the data: `", toString(missing), "`. ",
+            "See `?get_score_names` for further information.")
   }
-  return(metric_names)
+
+  return(score_names)
 }
 
 
@@ -159,7 +187,7 @@ get_metrics <- function(scores) {
 get_forecast_unit <- function(data) {
   # check whether there is a conflict in the forecast_unit and if so warn
   protected_columns <- get_protected_columns(data)
-  protected_columns <- c(protected_columns, attr(data, "metric_names"))
+  protected_columns <- c(protected_columns, attr(data, "score_names"))
   forecast_unit <- setdiff(colnames(data), unique(protected_columns))
   return(forecast_unit)
 }
@@ -181,10 +209,11 @@ get_protected_columns <- function(data = NULL) {
 
   protected_columns <- c(
     "predicted", "observed", "sample_id", "quantile", "upper", "lower",
-    "pit_value", "range", "boundary", "relative_skill", "scaled_rel_skill",
+    "pit_value", "range", "boundary",
     "interval_coverage", "interval_coverage_deviation",
     "quantile_coverage", "quantile_coverage_deviation",
     available_metrics(),
+    grep("_relative_skill$", names(data), value = TRUE),
     grep("coverage_", names(data), fixed = TRUE, value = TRUE)
   )
 
@@ -246,7 +275,7 @@ get_scoringutils_attributes <- function(object) {
     "scoringutils_by",
     "forecast_unit",
     "forecast_type",
-    "metric_names",
+    "score_names",
     "messages",
     "warnings"
   )

diff --git a/R/metrics-quantile.R b/R/metrics-quantile.R
@@ -233,8 +233,8 @@ underprediction <- function(observed, predicted, quantile, ...) {
 #'    c(-2, 0, 3, 3, 4)
 #' )
 #' quantile <- c(0.1, 0.25, 0.5, 0.75, 0.9)
-#' interval_coverage_quantile(observed, predicted, quantile)
-interval_coverage_quantile <- function(observed, predicted, quantile, range = 50) {
+#' interval_coverage(observed, predicted, quantile)
+interval_coverage <- function(observed, predicted, quantile, range = 50) {
   assert_input_quantile(observed, predicted, quantile)
   assert_number(range)
   necessary_quantiles <- c((100 - range) / 2, 100 - (100 - range) / 2) / 100
@@ -258,7 +258,7 @@ interval_coverage_quantile <- function(observed, predicted, quantile, range = 50
 #' @description Check the agreement between desired and actual interval coverage
 #' of a forecast.
 #'
-#' The function is similar to [interval_coverage_quantile()],
+#' The function is similar to [interval_coverage()],
 #' but takes all provided prediction intervals into account and
 #' compares nominal interval coverage (i.e. the desired interval coverage) with
 #' the actual observed interval coverage.
@@ -308,8 +308,8 @@ interval_coverage_quantile <- function(observed, predicted, quantile, range = 50
 #'   c(-2, 0, 3, 3, 4)
 #' )
 #' quantile <- c(0.1, 0.25, 0.5, 0.75, 0.9)
-#' interval_coverage_dev_quantile(observed, predicted, quantile)
-interval_coverage_dev_quantile <- function(observed, predicted, quantile) {
+#' interval_coverage_deviation(observed, predicted, quantile)
+interval_coverage_deviation <- function(observed, predicted, quantile) {
   assert_input_quantile(observed, predicted, quantile)
 
   # transform available quantiles into central interval ranges

diff --git a/R/metrics-sample.R b/R/metrics-sample.R
@@ -281,39 +281,3 @@ mad_sample <- function(observed = NULL, predicted, ...) {
   sharpness <- apply(predicted, MARGIN = 1, mad, ...)
   return(sharpness)
 }
-
-
-#' @title Interval Coverage
-#' @description To compute interval coverage for sample-based forecasts,
-#' predictive samples are converted first into predictive quantiles using the
-#' sample quantiles.
-#' @importFrom checkmate assert_number
-#' @rdname interval_coverage
-#' @export
-#' @examples
-#' observed <- rpois(30, lambda = 1:30)
-#' predicted <- replicate(200, rpois(n = 30, lambda = 1:30))
-#' interval_coverage_sample(observed, predicted)
-interval_coverage_sample <- function(observed, predicted, range = 50) {
-  assert_input_sample(observed, predicted)
-  assert_number(range)
-  necessary_quantiles <- c((100 - range) / 2, 100 - (100 - range) / 2) / 100
-
-  # this could be its own function, sample_to_quantile.numeric
-  # ==========================================================
-  n <- length(observed)
-  N <- length(predicted) / n
-  dt <- data.table(
-    observed = rep(observed, each = N),
-    predicted = as.vector(t(predicted))
-  )
-  quantile_dt <- sample_to_quantile(dt, necessary_quantiles)
-  # ==========================================================
-
-  # this could call interval_coverage_quantile instead
-  # ==========================================================
-  interval_dt <- quantile_to_interval(quantile_dt, format = "wide")
-  interval_dt[, interval_coverage := (observed >= lower) & (observed <= upper)]
-  # ==========================================================
-  return(interval_dt$interval_coverage)
-}
diff --git a/R/pairwise-comparisons.R b/R/pairwise-comparisons.R
@@ -237,8 +237,7 @@ pairwise_comparison_one_group <- function(scores,
   # calculate relative skill as geometric mean
   # small theta is again better (assuming that the score is negatively oriented)
   result[, `:=`(
-    theta = geometric_mean(ratio),
-    rel_to_baseline = NA_real_
+    theta = geometric_mean(ratio)
   ),
   by = "model"
   ]
@@ -261,9 +260,18 @@ pairwise_comparison_one_group <- function(scores,
 
   # rename ratio to mean_scores_ratio
   data.table::setnames(out,
-    old = c("ratio", "theta", "rel_to_baseline"),
-    new = c("mean_scores_ratio", "relative_skill", "scaled_rel_skill")
+    old = c("ratio", "theta"),
+    new = c(
+      "mean_scores_ratio",
+      paste(metric, "relative_skill", sep = "_")
+    )
   )
+  if (!is.null(baseline)) {
+    data.table::setnames(out,
+      old = "rel_to_baseline",
+      new = paste(metric, "scaled_relative_skill", sep = "_")
+    )
+  }
 
   return(out[])
 }