Skip to content

Commit

Permalink
Merge pull request #341 from epiforecasts/rework-lower-level-fcts
Browse files Browse the repository at this point in the history
Rework scoring functions and their interface
  • Loading branch information
nikosbosse authored Nov 1, 2023
2 parents b5360d1 + f91cec8 commit c8e5499
Show file tree
Hide file tree
Showing 107 changed files with 1,590 additions and 1,257 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Imports:
checkmate,
data.table,
ggdist (>= 3.2.0),
ggplot2 (>= 3.4.0),
Expand Down
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ export(summarise_scores)
export(summarize_scores)
export(theme_scoringutils)
export(transform_forecasts)
importFrom(checkmate,assert)
importFrom(checkmate,assert_data_frame)
importFrom(checkmate,assert_factor)
importFrom(checkmate,assert_numeric)
importFrom(checkmate,check_atomic_vector)
importFrom(checkmate,check_matrix)
importFrom(checkmate,check_numeric)
importFrom(data.table,"%like%")
importFrom(data.table,':=')
importFrom(data.table,.I)
Expand Down
19 changes: 17 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
# scoringutils 1.2.2
# scoringutils 1.3

This minor update addresses comments made by review from the Journal of Statistical Software (see preprint of the manuscript [here](https://arxiv.org/abs/2205.07090)).
This major update introduces a lot of breaking changes and addresses comments made by review from the Journal of Statistical Software (see preprint of the manuscript [here](https://arxiv.org/abs/2205.07090)).

## Package updates
- in `score()`, required columns "true_value" and "prediction" were replaced by required columns "observed" and "predicted". Scoring functions now also use the function arguements "observed" and "predicted" everywhere consistently.
- scoring functions received a consistent interface and input checks:
- metrics for binary forecasts:
- `observed`: factor with exactly 2 levels
- `predicted`: numeric, vector with probabilities
- metrics for point forecasts:
- `observed`: numeric vector
- `predicted`: numeric vector
- metrics for sample-based forecasts:
- `observed`: numeric, either a scalar or a vector
- `predicted`: numeric, a vector (if `observed` is a scalar) or a matrix (if `observed` is a vector)
- metrics for quantile-based forecasts:
- `observed`: numeric, either a scalar or a vector
- `predicted`: numeric, a vector (if `observed` is a scalar) or a matrix (if `observed` is a vector)
- `quantile`: numeric, a vector with quantile-levels. Can alternatively be a matrix of the same shape as `predicted`.
- changes to `avail_forecasts()` and `plot_avail_forecasts()`:
- the function `avail_forecasts()` was renamed to `available_forecasts()` for consistency with `available_metrics()`. The old function, `avail_forecasts()` is still available as an alias, but will be removed in the future.
- For clarity, the output column in `avail_forecasts()` was renamed from "Number forecasts" to "count".
Expand Down
2 changes: 1 addition & 1 deletion R/avail_forecasts.R → R/available_forecasts.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' categories over which the number of forecasts should be counted.
#' By default (`by = NULL`) this will be the unit of a single forecast (i.e.
#' all available columns (apart from a few "protected" columns such as
#' 'prediction' and 'true value') plus "quantile" or "sample" where present).
#' 'predicted' and 'observed') plus "quantile" or "sample_id" where present).
#'
#' @param collapse character vector (default is `c("quantile", "sample"`) with
#' names of categories for which the number of rows should be collapsed to one
Expand Down
130 changes: 64 additions & 66 deletions R/bias.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' }
#'
#' where \eqn{P_t} is the empirical cumulative distribution function of the
#' prediction for the true value \eqn{x_t}. Computationally, \eqn{P_t (x_t)} is
#' prediction for the observed value \eqn{x_t}. Computationally, \eqn{P_t (x_t)} is
#' just calculated as the fraction of predictive samples for \eqn{x_t}
#' that are smaller than \eqn{x_t}.
#'
Expand All @@ -29,20 +29,20 @@
#' -1 and 1 and is 0 ideally.
#'
#' @return vector of length n with the biases of the predictive samples with
#' respect to the true values.
#' respect to the observed values.
#' @inheritParams ae_median_sample
#' @author Nikos Bosse \email{nikosbosse@@gmail.com}
#' @examples
#'
#' ## integer valued forecasts
#' true_values <- rpois(30, lambda = 1:30)
#' predictions <- replicate(200, rpois(n = 30, lambda = 1:30))
#' bias_sample(true_values, predictions)
#' observed <- rpois(30, lambda = 1:30)
#' predicted <- replicate(200, rpois(n = 30, lambda = 1:30))
#' bias_sample(observed, predicted)
#'
#' ## continuous forecasts
#' true_values <- rnorm(30, mean = 1:30)
#' predictions <- replicate(200, rnorm(30, mean = 1:30))
#' bias_sample(true_values, predictions)
#' observed <- rnorm(30, mean = 1:30)
#' predicted <- replicate(200, rnorm(30, mean = 1:30))
#' bias_sample(observed, predicted)
#' @export
#' @references
#' The integer valued Bias function is discussed in
Expand All @@ -54,23 +54,21 @@
#' \doi{10.1371/journal.pcbi.1006785}
#' @keywords metric

bias_sample <- function(true_values, predictions) {
bias_sample <- function(observed, predicted) {

# check inputs
check_true_values(true_values)
check_predictions(predictions, true_values, class = "matrix")
prediction_type <- get_prediction_type(predictions)
check_input_sample(observed, predicted)
prediction_type <- get_prediction_type(predicted)

# empirical cdf
n_pred <- ncol(predictions)
p_x <- rowSums(predictions <= true_values) / n_pred
n_pred <- ncol(predicted)
p_x <- rowSums(predicted <= observed) / n_pred

if (prediction_type == "continuous") {
res <- 1 - 2 * p_x
return(res)
} else {
# for integer case also calculate empirical cdf for (y-1)
p_xm1 <- rowSums(predictions <= (true_values - 1)) / n_pred
p_xm1 <- rowSums(predicted <= (observed - 1)) / n_pred

res <- 1 - (p_x + p_xm1)
return(res)
Expand All @@ -95,104 +93,104 @@ bias_sample <- function(true_values, predictions) {
#'
#' where \eqn{Q_t} is the set of quantiles that form the predictive
#' distribution at time \eqn{t}. They represent our
#' belief about what the true value $x_t$ will be. For consistency, we define
#' belief about what the observed value $x_t$ will be. For consistency, we define
#' \eqn{Q_t} such that it always includes the element
#' \eqn{q_{t, 0} = - \infty} and \eqn{q_{t,1} = \infty}.
#' \eqn{1()} is the indicator function that is \eqn{1} if the
#' condition is satisfied and $0$ otherwise. In clearer terms, \eqn{B_t} is
#' defined as the maximum percentile rank for which the corresponding quantile
#' is still below the true value, if the true value is smaller than the
#' median of the predictive distribution. If the true value is above the
#' is still below the observed value, if the observed value is smaller than the
#' median of the predictive distribution. If the observed value is above the
#' median of the predictive distribution, then $B_t$ is the minimum percentile
#' rank for which the corresponding quantile is still larger than the true
#' value. If the true value is exactly the median, both terms cancel out and
#' value. If the observed value is exactly the median, both terms cancel out and
#' \eqn{B_t} is zero. For a large enough number of quantiles, the
#' percentile rank will equal the proportion of predictive samples below the
#' observed true value, and this metric coincides with the one for
#' observed value, and this metric coincides with the one for
#' continuous forecasts.
#'
#' Bias can assume values between
#' -1 and 1 and is 0 ideally (i.e. unbiased).
#' @param predictions vector of length corresponding to the number of quantiles
#' @param predicted vector of length corresponding to the number of quantiles
#' that holds predictions
#' @param quantiles vector of corresponding size with the quantiles for which
#' predictions were made. If this does not contain the median (0.5) then the
#' median is imputed as being the mean of the two innermost quantiles.
#' @param quantile vector of corresponding size with the quantile levels for
#' which predictions were made. If this does not contain the median (0.5) then
#' the median is imputed as being the mean of the two innermost quantiles.
#' @inheritParams bias_range
#' @return scalar with the quantile bias for a single quantile prediction
#' @author Nikos Bosse \email{nikosbosse@@gmail.com}
#' @examples
#'
#' predictions <- c(
#' predicted <- c(
#' 705.500, 1127.000, 4006.250, 4341.500, 4709.000, 4821.996,
#' 5340.500, 5451.000, 5703.500, 6087.014, 6329.500, 6341.000,
#' 6352.500, 6594.986, 6978.500, 7231.000, 7341.500, 7860.004,
#' 7973.000, 8340.500, 8675.750, 11555.000, 11976.500
#' )
#'
#' quantiles <- c(0.01, 0.025, seq(0.05, 0.95, 0.05), 0.975, 0.99)
#' quantile <- c(0.01, 0.025, seq(0.05, 0.95, 0.05), 0.975, 0.99)
#'
#' true_value <- 8062
#' observed <- 8062
#'
#' bias_quantile(predictions, quantiles, true_value = true_value)
#' bias_quantile(observed, predicted, quantile)
#' @export
#' @keywords metric

bias_quantile <- function(predictions, quantiles, true_value) {
# check that predictions and quantiles have the same length
if (!length(predictions) == length(quantiles)) {
stop("predictions and quantiles must have the same length")
bias_quantile <- function(observed, predicted, quantile) {
# check that predictions and quantile have the same length
if (!length(predicted) == length(quantile)) {
stop("`predicted` and `quantile` must have the same length")
}

if (anyNA(predictions)) {
quantiles <- quantiles[!is.na(predictions)]
predictions <- predictions[!is.na(predictions)]
if (anyNA(predicted)) {
quantile <- quantile[!is.na(predicted)]
predicted <- predicted[!is.na(predicted)]
}

if (anyNA(quantiles)) {
quantiles <- quantiles[!is.na(quantiles)]
predictions <- predictions[!is.na(quantiles)]
if (anyNA(quantile)) {
quantile <- quantile[!is.na(quantile)]
predicted <- predicted[!is.na(quantile)]
}

# if there is no input, return NA
if (length(quantiles) == 0 || length(predictions) == 0) {
if (length(quantile) == 0 || length(predicted) == 0) {
return(NA_real_)
}

check_quantiles(quantiles)
check_quantiles(quantile)

if (!all(diff(predictions) >= 0)) {
if (!all(diff(predicted) >= 0)) {
stop("predictions must be increasing with quantiles")
}

if (0.5 %in% quantiles) {
median_prediction <- predictions[quantiles == 0.5]
if (0.5 %in% quantile) {
median_prediction <- predicted[quantile == 0.5]
} else {
# if median is not available, compute as mean of two innermost quantiles
message(
"Median not available, computing as mean of two innermost quantiles",
" in order to compute bias."
)
median_prediction <-
0.5 * predictions[quantiles == max(quantiles[quantiles < 0.5])] +
0.5 * predictions[quantiles == min(quantiles[quantiles > 0.5])]
0.5 * predicted[quantile == max(quantile[quantile < 0.5])] +
0.5 * predicted[quantile == min(quantile[quantile > 0.5])]
}

if (true_value == median_prediction) {
if (observed == median_prediction) {
bias <- 0
return(bias)
} else if (true_value < median_prediction) {
if (true_value < min(predictions)) {
} else if (observed < median_prediction) {
if (observed < min(predicted)) {
bias <- 1
} else {
q <- max(quantiles[predictions <= true_value])
q <- max(quantile[predicted <= observed])
bias <- 1 - 2 * q
}
} else if (true_value > median_prediction) {
if (true_value > max(predictions)) {
} else if (observed > median_prediction) {
if (observed > max(predicted)) {
bias <- -1
} else {
q <- min(quantiles[predictions >= true_value])
q <- min(quantile[predicted >= observed])
bias <- 1 - 2 * q
}
}
Expand Down Expand Up @@ -224,21 +222,21 @@ bias_quantile <- function(predictions, quantiles, true_value) {
#'
#' where \eqn{Q_t} is the set of quantiles that form the predictive
#' distribution at time \eqn{t}. They represent our
#' belief about what the true value \eqn{x_t} will be. For consistency, we
#' define
#' belief about what the later observed value \eqn{x_t} will be. For
#' consistency, we define
#' \eqn{Q_t} such that it always includes the element
#' \eqn{q_{t, 0} = - \infty} and \eqn{q_{t,1} = \infty}.
#' \eqn{\mathbf{1}()}{1()} is the indicator function that is \eqn{1} if the
#' condition is satisfied and $0$ otherwise. In clearer terms, \eqn{B_t} is
#' defined as the maximum percentile rank for which the corresponding quantile
#' is still below the true value, if the true value is smaller than the
#' median of the predictive distribution. If the true value is above the
#' is still below the observed value, if the observed value is smaller than the
#' median of the predictive distribution. If the observed value is above the
#' median of the predictive distribution, then $B_t$ is the minimum percentile
#' rank for which the corresponding quantile is still larger than the true
#' value. If the true value is exactly the median, both terms cancel out and
#' value. If the observed value is exactly the median, both terms cancel out and
#' \eqn{B_t} is zero. For a large enough number of quantiles, the
#' percentile rank will equal the proportion of predictive samples below the
#' observed true value, and this metric coincides with the one for
#' observed value, and this metric coincides with the one for
#' continuous forecasts.
#'
#' Bias can assume values between
Expand All @@ -251,7 +249,7 @@ bias_quantile <- function(predictions, quantiles, true_value) {
#' prediction interval
#' @param range vector of corresponding size with information about the width
#' of the central prediction interval
#' @param true_value a single true value
#' @param observed a single observed value
#' @return scalar with the quantile bias for a single quantile prediction
#' @seealso bias_quantile bias_sample
#' @author Nikos Bosse \email{nikosbosse@@gmail.com}
Expand All @@ -271,16 +269,16 @@ bias_quantile <- function(predictions, quantiles, true_value) {
#'
#' range <- c(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98)
#'
#' true_value <- 8062
#' observed <- 8062
#'
#' bias_range(
#' lower = lower, upper = upper,
#' range = range, true_value = true_value
#' range = range, observed = observed
#' )
#' @export
#' @keywords metric

bias_range <- function(lower, upper, range, true_value) {
bias_range <- function(lower, upper, range, observed) {

if (anyNA(range)) {
if (is.na(range[1]) && !any(range[-1] == 0)) {
Expand All @@ -302,17 +300,17 @@ bias_range <- function(lower, upper, range, true_value) {
check_quantiles(range, name = "range", range = c(0, 100))

# Convert range to quantiles
quantiles <- c(
quantile <- c(
rev(abs(100 - range) / (2 * 100)),
abs(100 + range[range != 0]) / (2 * 100)
)

# Combine predictions
upper_without_median <- upper[range != 0]
predictions <- c(rev(lower), upper_without_median)
predicted <- c(rev(lower), upper_without_median)

# Call bias_quantile
bias <- bias_quantile(predictions, quantiles, true_value)
bias <- bias_quantile(observed, predicted, quantile)

return(bias)
}
Loading

0 comments on commit c8e5499

Please sign in to comment.