From 734df58ed9102f56917b15061144409993f1d4b5 Mon Sep 17 00:00:00 2001 From: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> Date: Wed, 14 Aug 2024 10:13:12 +0200 Subject: [PATCH 1/3] Use internal checking function to reduce duplication --- R/cfr_rolling.R | 29 +---------------------------- R/cfr_static.R | 30 +----------------------------- R/cfr_time_varying.R | 18 +----------------- R/check_input_data.R | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 35 insertions(+), 74 deletions(-) create mode 100644 R/check_input_data.R diff --git a/R/cfr_rolling.R b/R/cfr_rolling.R index 9e22170b..56dc2605 100644 --- a/R/cfr_rolling.R +++ b/R/cfr_rolling.R @@ -63,32 +63,7 @@ cfr_rolling <- function(data, " the course of the outbreak." ) # input checking - checkmate::assert_data_frame( - data, - min.rows = 1, min.cols = 3 - ) - # check that input `` has columns date, cases, and deaths - checkmate::assert_names( - colnames(data), - must.include = c("date", "cases", "deaths") - ) - # check for any NAs among data - checkmate::assert_data_frame( - data[, c("date", "cases", "deaths")], - types = c("Date", "integerish"), - any.missing = FALSE - ) - # check that data$date is a date column - checkmate::assert_date(data$date, any.missing = FALSE, all.missing = FALSE) - # check for excessive missing date and throw an error - # also check delay_density - stopifnot( - "Input data must have sequential dates with none missing or duplicated" = - identical(unique(diff(data$date)), 1) # use numeric 1, not integer - # this solution works when df$date is `Date` - # this may need more thought for dates that are integers, POSIXct, - # or other units; consider the units package - ) + .check_input_data(data) checkmate::assert_count(poisson_threshold, positive = TRUE) # NOTE: delay_density is checked in estimate_outcomes() if passed and not NULL @@ -97,8 +72,6 @@ cfr_rolling <- function(data, cumulative_cases <- cumsum(data$cases) cumulative_deaths <- cumsum(data$deaths) - # Check cumulative sums for count type - checkmate::assert_integerish(cumulative_cases, lower = 0) # use assert_number to set upper limit at total_cases checkmate::assert_integerish( cumulative_deaths, diff --git a/R/cfr_static.R b/R/cfr_static.R index 4851a611..e48c4c35 100644 --- a/R/cfr_static.R +++ b/R/cfr_static.R @@ -124,33 +124,7 @@ cfr_static <- function(data, delay_density = NULL, poisson_threshold = 100) { - # input checking - checkmate::assert_data_frame( - data, - min.rows = 1, min.cols = 3 - ) - # check that input `` has columns date, cases, and deaths - checkmate::assert_names( - colnames(data), - must.include = c("date", "cases", "deaths") - ) - # check for any NAs among data - checkmate::assert_data_frame( - data[, c("date", "cases", "deaths")], - types = c("Date", "integerish"), - any.missing = FALSE - ) - # check that data$date is a date column - checkmate::assert_date(data$date, any.missing = FALSE, all.missing = FALSE) - - # check for excessive missing date and throw an error - stopifnot( - "Input data must have sequential dates with none missing or duplicated" = - identical(unique(diff(data$date)), 1) # use numeric 1, not integer - # this solution works when df$date is `Date` - # this may need more thought for dates that are integers, POSIXct, - # or other units; consider the units package - ) + .check_input_data(data) checkmate::assert_count(poisson_threshold, positive = TRUE) # NOTE: delay_density is checked in estimate_outcomes() if passed and not NULL @@ -160,8 +134,6 @@ cfr_static <- function(data, total_cases <- sum(data$cases, na.rm = TRUE) total_deaths <- sum(data$deaths, na.rm = TRUE) - # Add input checking for total cases and deaths - checkmate::assert_count(total_cases) # use assert_number to set upper limit at total_cases checkmate::assert_number(total_deaths, upper = total_cases, lower = 0) diff --git a/R/cfr_time_varying.R b/R/cfr_time_varying.R index e8a2aa57..7f136839 100644 --- a/R/cfr_time_varying.R +++ b/R/cfr_time_varying.R @@ -100,26 +100,10 @@ cfr_time_varying <- function(data, # input checking # zero count allowed to include all data checkmate::assert_count(burn_in) - - # expect rows more than burn in value - checkmate::assert_data_frame(data, min.cols = 3, min.rows = burn_in + 1) - # check that input `` has columns date, cases, and deaths - checkmate::assert_names( - colnames(data), - must.include = c("date", "cases", "deaths") - ) - # check for any NAs among data - checkmate::assert_data_frame( - data[, c("date", "cases", "deaths")], - any.missing = FALSE - ) - # check that data$date is a date column - checkmate::assert_date(data$date, any.missing = FALSE, all.missing = FALSE) checkmate::assert_count(smoothing_window, null.ok = TRUE) + .check_input_data(data) stopifnot( - "Input data must have sequential dates with none missing or duplicated" = - identical(unique(diff(data$date)), 1), # use numeric 1, not integer "`smoothing_window` must be an odd number greater than 0" = (smoothing_window %% 2 != 0), "`delay_density` must be a function with a single required argument, diff --git a/R/check_input_data.R b/R/check_input_data.R new file mode 100644 index 00000000..715dcc5e --- /dev/null +++ b/R/check_input_data.R @@ -0,0 +1,32 @@ +.check_input_data <- function(data) { + + checkmate::assert_data_frame( + data, + min.rows = 1, min.cols = 3 + ) + # check that input `` has columns date, cases, and deaths + checkmate::assert_names( + colnames(data), + must.include = c("date", "cases", "deaths") + ) + # check for any NAs among data + checkmate::assert_data_frame( + data[, c("date", "cases", "deaths")], + types = c("Date", "integerish"), + any.missing = FALSE, + ) + # check that data$date is a date column + checkmate::assert_date(data$date, any.missing = FALSE, all.missing = FALSE) + + # Check count types + checkmate::assert_integerish(data$cases, lower = 0) + checkmate::assert_integerish(data$deaths, lower = 0) + + stopifnot( + "Input data must have sequential dates with none missing or duplicated" = + identical(unique(diff(data$date)), 1) # use numeric 1, not integer + # this solution works when df$date is `Date` + # this may need more thought for dates that are integers, POSIXct, + # or other units; consider the units package + ) +} From 1f8f22034efad0ae247225da864e47e730cd7f11 Mon Sep 17 00:00:00 2001 From: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> Date: Wed, 14 Aug 2024 10:22:34 +0200 Subject: [PATCH 2/3] Collect all input issues in a collection --- R/check_input_data.R | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/R/check_input_data.R b/R/check_input_data.R index 715dcc5e..640633da 100644 --- a/R/check_input_data.R +++ b/R/check_input_data.R @@ -1,26 +1,35 @@ .check_input_data <- function(data) { - + coll <- checkmate::makeAssertCollection() + checkmate::assert_data_frame( data, - min.rows = 1, min.cols = 3 + min.rows = 1, min.cols = 3, + add = coll ) # check that input `` has columns date, cases, and deaths checkmate::assert_names( colnames(data), - must.include = c("date", "cases", "deaths") + must.include = c("date", "cases", "deaths"), + add = coll ) # check for any NAs among data checkmate::assert_data_frame( data[, c("date", "cases", "deaths")], types = c("Date", "integerish"), any.missing = FALSE, + add = coll ) # check that data$date is a date column - checkmate::assert_date(data$date, any.missing = FALSE, all.missing = FALSE) + checkmate::assert_date( + data$date, + any.missing = FALSE, + all.missing = FALSE, + add = coll + ) # Check count types - checkmate::assert_integerish(data$cases, lower = 0) - checkmate::assert_integerish(data$deaths, lower = 0) + checkmate::assert_integerish(data$cases, lower = 0, add = coll) + checkmate::assert_integerish(data$deaths, lower = 0, add = coll) stopifnot( "Input data must have sequential dates with none missing or duplicated" = @@ -29,4 +38,6 @@ # this may need more thought for dates that are integers, POSIXct, # or other units; consider the units package ) + + checkmate::reportAssertions(coll) } From 97d2ce68bb955db93419668d33ac1f495829de36 Mon Sep 17 00:00:00 2001 From: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> Date: Wed, 14 Aug 2024 10:44:25 +0200 Subject: [PATCH 3/3] Fix lints --- R/check_input_data.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/check_input_data.R b/R/check_input_data.R index 640633da..3ffc6d13 100644 --- a/R/check_input_data.R +++ b/R/check_input_data.R @@ -1,6 +1,6 @@ .check_input_data <- function(data) { coll <- checkmate::makeAssertCollection() - + checkmate::assert_data_frame( data, min.rows = 1, min.cols = 3, @@ -21,8 +21,8 @@ ) # check that data$date is a date column checkmate::assert_date( - data$date, - any.missing = FALSE, + data$date, + any.missing = FALSE, all.missing = FALSE, add = coll ) @@ -38,6 +38,6 @@ # this may need more thought for dates that are integers, POSIXct, # or other units; consider the units package ) - + checkmate::reportAssertions(coll) }