From 32239bf8220db7edad5584afcc4a55255dc0cd9d Mon Sep 17 00:00:00 2001
From: nikosbosse <nikosbosse@gmail.com>
Date: Thu, 9 Nov 2023 15:42:59 +0100
Subject: [PATCH] Update documentation for `wis()` and its components

---
 R/metrics-quantile.R    | 92 +++++++++++++++++++++++++++++++++++++-
 man/metrics_quantile.Rd |  2 +-
 man/wis.Rd              | 98 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 188 insertions(+), 4 deletions(-)

diff --git a/R/metrics-quantile.R b/R/metrics-quantile.R
index e7e6fbfed..f3598690e 100644
--- a/R/metrics-quantile.R
+++ b/R/metrics-quantile.R
@@ -2,13 +2,92 @@
 # Metrics with a many-to-one relationship between input and score
 ################################################################################
 
-#' Weighted Interval Score
+#' Weighted Interval Score (WIS)
+#' @description
+#' The WIS is a proper scoring rule used to evaluate forecasts in an interval- /
+#' quantile-based format. See Bracher et al. (2021). Smaller values are better.
+#'
+#' As the name suggest the score assumes that a forecast comes in the form of
+#' one or multiple central prediction intervals. A prediction interval is
+#' characterised by a lower and an upper bound formed by a pair of predictive
+#' quantiles. For example, a 50% central prediction interval is formed by the
+#' 0.25 and 0.75 quantiles of the predictive distribution.
+#'
+#' **Interval score**
+#'
+#' The interval score (IS) is the sum of three components:
+#' overprediction, underprediction and dispersion. For a single prediction
+#' interval only one of the components is non-zero. If for a single prediction
+#' interval the observed value is below the lower bound, then the interval
+#' score is equal to the absolute difference between the lower bound and the
+#' observed value ("underprediction"). "Overprediction" is defined analogously.
+#' If the observed value falls within the bounds of the prediction interval,
+#' then the interval score is equal to the width of the prediction interval,
+#' i.e. the difference between the upper and lower bound. For a single interval,
+#' we therefore have:
+#'
+#' \deqn{
+#' \textrm{IS} = (\textrm{upper} - \textrm{lower}) + \frac{2}{\alpha}(\textrm{lower}
+#'  - \textrm{observed}) *
+#' \mathbf{1}(\textrm{observed} < \textrm{lower}) +
+#' \frac{2}{\alpha}(\textrm{observed} - \textrm{upper}) *
+#' \mathbf{1}(\textrm{observed} > \textrm{upper})
+#' }{
+#' score = (upper - lower) + 2/alpha * (lower - observed) *
+#' 1(observed < lower) + 2/alpha * (observed - upper) *
+#' 1(observed > upper)
+#' }
+#' where \eqn{\mathbf{1}()}{1()} is the indicator function and
+#' indicates how much is outside the prediction interval.
+#' \eqn{\alpha}{alpha} is the decimal value that indicates how much is outside
+#' the prediction interval. For a 90% prediction interval, for example,
+#' \eqn{\alpha}{alpha} is equal to 0.1. No specific distribution is assumed,
+#' but the range has to be symmetric (i.e you can't use the 0.1 quantile
+#' as the lower bound and the 0.7 quantile as the upper).
+#' Non-symmetric quantiles can be scored using the function [quantile_score()].
+#'
+#' Usually the interval score is weighted by a factor that makes sure that the
+#' average score across an increasing number of equally spaced
+#' quantiles, converges to the continuous ranked probability score (CRPS). This
+#' weighted score is called the weihted interval score (WIS).
+#' The weight commonly used is \eqn{\alpha / 2}{alpha / 2}.
+#'
+#' **Quantile score**
+#'
+#' In addition to the interval score, there also exists a quantile score (QS)
+#' (see [quantile_score()]), which is equal to the so-called pinball loss.
+#' The quantile score can be computed for a single quantile (whereas the
+#' interval score requires two quantiles that form an interval). However,
+#' the intuitive decomposition into overprediction, underprediction and
+#' dispersion does not exist for the quantile score.
+#'
+#' **Two versions of the weighted interval score**
+#'
+#' There are two ways to conceptualise the weighted interval score across
+#' several quantiles / prediction intervals and the median.
+#'
+#' In one view, you would treat the WIS as the average of quantile scores (and
+#' the median as 0.5-quantile) (this is the default for `wis()`). In another
+#' view, you would treat the WIS as the average of several interval scores +
+#' the difference between observed value and median forecast. The effect of
+#' that is that in contrast to the first view, the median has twice as much
+#' weight (because it is weighted like a prediction interval, rather than like
+#' a single quantile). Both are valid ways to conceptualise the WIS and you
+#' can control the behvaviour with the `count_median_twice`-argument.
+#'
+#' **WIS components**:
+#' WIS components can be computed individually using the functions
+#' `overprediction`, `underprediction`, and `dispersion.`
+#'
 #' @inheritParams interval_score
 #' @param predicted vector of size n with the predicted values
 #' @param quantile vector with quantile levels of size N
 #' @param count_median_twice if TRUE, count the median twice in the score
 #' @param na.rm if TRUE, ignore NA values when computing the score
 #' @importFrom stats weighted.mean
+#' @return
+#' `wis()`: a numeric vector with WIS values (one per observation), or a list
+#' with separate entries if `separate_results` is `TRUE`.
 #' @export
 wis <- function(observed,
                 predicted,
@@ -62,6 +141,9 @@ wis <- function(observed,
   }
 }
 
+
+#' @return
+#' `dispersion()`: a numeric vector with dispersion values (one per observation)
 #' @export
 #' @rdname wis
 dispersion <- function(observed, predicted, quantile) {
@@ -69,6 +151,10 @@ dispersion <- function(observed, predicted, quantile) {
   wis(observed, predicted, quantile, separate_results = TRUE)$dispersion
 }
 
+
+#' @return
+#' `overprediction()`: a numeric vector with overprediction values (one per
+#' observation)
 #' @export
 #' @rdname wis
 overprediction <- function(observed, predicted, quantile) {
@@ -76,6 +162,10 @@ overprediction <- function(observed, predicted, quantile) {
   wis(observed, predicted, quantile, separate_results = TRUE)$overprediction
 }
 
+
+#' @return
+#' `underprediction()`: a numeric vector with underprediction values (one per
+#' observation)
 #' @export
 #' @rdname wis
 underprediction <- function(observed, predicted, quantile) {
diff --git a/man/metrics_quantile.Rd b/man/metrics_quantile.Rd
index e96dcc836..9fda39f03 100644
--- a/man/metrics_quantile.Rd
+++ b/man/metrics_quantile.Rd
@@ -5,7 +5,7 @@
 \alias{metrics_quantile}
 \title{Default metrics for quantile-based forecasts.}
 \format{
-An object of class \code{list} of length 4.
+An object of class \code{list} of length 7.
 }
 \usage{
 metrics_quantile
diff --git a/man/wis.Rd b/man/wis.Rd
index fbf8bc6f7..a76b3c35f 100644
--- a/man/wis.Rd
+++ b/man/wis.Rd
@@ -2,7 +2,10 @@
 % Please edit documentation in R/metrics-quantile.R
 \name{wis}
 \alias{wis}
-\title{Weighted Interval Score}
+\alias{dispersion}
+\alias{overprediction}
+\alias{underprediction}
+\title{Weighted Interval Score (WIS)}
 \usage{
 wis(
   observed,
@@ -13,6 +16,12 @@ wis(
   count_median_twice = FALSE,
   na.rm = TRUE
 )
+
+dispersion(observed, predicted, quantile)
+
+overprediction(observed, predicted, quantile)
+
+underprediction(observed, predicted, quantile)
 }
 \arguments{
 \item{observed}{A vector with observed values of size n}
@@ -36,6 +45,91 @@ Default: \code{TRUE}.}
 
 \item{na.rm}{if TRUE, ignore NA values when computing the score}
 }
+\value{
+\code{wis()}: a numeric vector with WIS values (one per observation), or a list
+with separate entries if \code{separate_results} is \code{TRUE}.
+
+\code{dispersion()}: a numeric vector with dispersion values (one per observation)
+
+\code{overprediction()}: a numeric vector with overprediction values (one per
+observation)
+
+\code{underprediction()}: a numeric vector with underprediction values (one per
+observation)
+}
 \description{
-Weighted Interval Score
+The WIS is a proper scoring rule used to evaluate forecasts in an interval- /
+quantile-based format. See Bracher et al. (2021). Smaller values are better.
+
+As the name suggest the score assumes that a forecast comes in the form of
+one or multiple central prediction intervals. A prediction interval is
+characterised by a lower and an upper bound formed by a pair of predictive
+quantiles. For example, a 50\% central prediction interval is formed by the
+0.25 and 0.75 quantiles of the predictive distribution.
+
+\strong{Interval score}
+
+The interval score (IS) is the sum of three components:
+overprediction, underprediction and dispersion. For a single prediction
+interval only one of the components is non-zero. If for a single prediction
+interval the observed value is below the lower bound, then the interval
+score is equal to the absolute difference between the lower bound and the
+observed value ("underprediction"). "Overprediction" is defined analogously.
+If the observed value falls within the bounds of the prediction interval,
+then the interval score is equal to the width of the prediction interval,
+i.e. the difference between the upper and lower bound. For a single interval,
+we therefore have:
+
+\deqn{
+\textrm{IS} = (\textrm{upper} - \textrm{lower}) + \frac{2}{\alpha}(\textrm{lower}
+ - \textrm{observed}) *
+\mathbf{1}(\textrm{observed} < \textrm{lower}) +
+\frac{2}{\alpha}(\textrm{observed} - \textrm{upper}) *
+\mathbf{1}(\textrm{observed} > \textrm{upper})
+}{
+score = (upper - lower) + 2/alpha * (lower - observed) *
+1(observed < lower) + 2/alpha * (observed - upper) *
+1(observed > upper)
+}
+where \eqn{\mathbf{1}()}{1()} is the indicator function and
+indicates how much is outside the prediction interval.
+\eqn{\alpha}{alpha} is the decimal value that indicates how much is outside
+the prediction interval. For a 90\% prediction interval, for example,
+\eqn{\alpha}{alpha} is equal to 0.1. No specific distribution is assumed,
+but the range has to be symmetric (i.e you can't use the 0.1 quantile
+as the lower bound and the 0.7 quantile as the upper).
+Non-symmetric quantiles can be scored using the function \code{\link[=quantile_score]{quantile_score()}}.
+
+Usually the interval score is weighted by a factor that makes sure that the
+average score across an increasing number of equally spaced
+quantiles, converges to the continuous ranked probability score (CRPS). This
+weighted score is called the weihted interval score (WIS).
+The weight commonly used is \eqn{\alpha / 2}{alpha / 2}.
+
+\strong{Quantile score}
+
+In addition to the interval score, there also exists a quantile score (QS)
+(see \code{\link[=quantile_score]{quantile_score()}}), which is equal to the so-called pinball loss.
+The quantile score can be computed for a single quantile (whereas the
+interval score requires two quantiles that form an interval). However,
+the intuitive decomposition into overprediction, underprediction and
+dispersion does not exist for the quantile score.
+
+\strong{Two versions of the weighted interval score}
+
+There are two ways to conceptualise the weighted interval score across
+several quantiles / prediction intervals and the median.
+
+In one view, you would treat the WIS as the average of quantile scores (and
+the median as 0.5-quantile) (this is the default for \code{wis()}). In another
+view, you would treat the WIS as the average of several interval scores +
+the difference between observed value and median forecast. The effect of
+that is that in contrast to the first view, the median has twice as much
+weight (because it is weighted like a prediction interval, rather than like
+a single quantile). Both are valid ways to conceptualise the WIS and you
+can control the behvaviour with the \code{count_median_twice}-argument.
+
+\strong{WIS components}:
+WIS components can be computed individually using the functions
+\code{overprediction}, \code{underprediction}, and \code{dispersion.}
 }