From 75a6ec819f30f22681fb997ddcfd3b4be19b8d89 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 16 Feb 2024 09:49:50 -0800 Subject: [PATCH 1/4] feat: make set_cache more verbose --- R/cache.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/cache.R b/R/cache.R index cd94c92b..51337abc 100644 --- a/R/cache.R +++ b/R/cache.R @@ -154,7 +154,6 @@ set_cache <- function(cache_dir = NULL, } } - if (!cache_usable) { print(glue::glue( "The directory at {cache_dir} is not accessible; check permissions and/or use a different ", @@ -168,6 +167,13 @@ set_cache <- function(cache_dir = NULL, logfile = file.path(cache_dir, logfile) ) } + + cli::cli_inform(c( + "!" = "epidatr cache is being used (set env var EPIDATR_USE_CACHE=FALSE if not intended).", + "i" = "The cache directory is {cache_dir}.", + "i" = "The cache will be cleared after {days} days and will be pruned if it exceeds {max_size} MB.", + "i" = "The log of cache transactions is stored at {file.path(cache_dir, logfile)}." + )) } #' Manually reset the cache, deleting all currently saved data and starting afresh From 78e97ed4695731aa34fbe7858a57253489f3b6a3 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Sat, 24 Feb 2024 09:59:31 -0800 Subject: [PATCH 2/4] refactor+doc: update default timeout_seconds to 15 minutes. Also remove some timeout_seconds-related defaults from a few internal request functions, since they almost always receive a default value from upstream, to reduce the number of places the default value is hardcoded. Fix all calls to those functions to include arguments. --- .github/pull_request_template.md | 11 ++++++----- DESCRIPTION | 2 +- NEWS.md | 1 + R/epidatacall.R | 10 +++++----- R/request.R | 2 +- man/do_request.Rd | 2 +- man/fetch_args_list.Rd | 2 +- man/request_impl.Rd | 2 +- tests/testthat/generate_test_data.R | 6 ++++-- tests/testthat/test-epidatacall.R | 14 +++++++++++--- tests/testthat/test-request.R | 2 +- 11 files changed, 33 insertions(+), 21 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 9a0edceb..14a572e7 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -2,13 +2,14 @@ Please: -- [ ] Make sure this PR is against "dev", not "main". +- [ ] Make sure this PR is against "dev", not "main" (unless this is a release + PR). - [ ] Request a review from one of the current epiprocess main reviewers: brookslogan, nmdefries. -- [ ] Makes sure to bump the version number in `DESCRIPTION` and `NEWS.md`. - Always increment the patch version number (the third number), unless you are - making a release PR from dev to main, in which case increment the minor - version number (the second number). +- [ ] Makes sure to bump the version number in `DESCRIPTION`. Always increment + the patch version number (the third number), unless you are making a + release PR from dev to main, in which case increment the minor version + number (the second number). - [ ] Describe changes made in NEWS.md, making sure breaking changes (backwards-incompatible changes to the documented interface) are noted. Collect the changes under the next release number (e.g. if you are on diff --git a/DESCRIPTION b/DESCRIPTION index febe8e4a..61d97ec7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: epidatr Type: Package Title: Client for Delphi's 'Epidata' API -Version: 1.0.1 +Version: 1.0.2 Date: 2023-12-07 Authors@R: c( diff --git a/NEWS.md b/NEWS.md index 41c461ec..41ef133d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -23,6 +23,7 @@ - `pvt_twitter` and `pub_wiki` now use `time_type` and `time_values` args instead of mutually exclusive `dates` and `epiweeks` (#236). This matches the interface of the `pub_covidcast` endpoint. - All endpoints now support the use of "\*" as a wildcard to fetch all dates or epiweeks (#234). - Fixed bug with NAs when parsing ints (#243). +- Updated the default `timeout_seconds` to 15 minutes to allow large queries by default. # epidatr 1.0.0 diff --git a/R/epidatacall.R b/R/epidatacall.R index cde20c9b..958e00aa 100644 --- a/R/epidatacall.R +++ b/R/epidatacall.R @@ -89,7 +89,7 @@ create_epidata_call <- function(endpoint, params, meta = NULL, } #' @importFrom checkmate test_class test_list -request_arguments <- function(epidata_call, format_type, fields = NULL) { +request_arguments <- function(epidata_call, format_type, fields) { stopifnot(inherits(epidata_call, "epidata_call")) stopifnot(format_type %in% c("json", "csv", "classic")) stopifnot(is.null(fields) || is.character(fields)) @@ -164,7 +164,7 @@ fetch_args_list <- function( disable_date_parsing = FALSE, disable_data_frame_parsing = FALSE, return_empty = FALSE, - timeout_seconds = 30, + timeout_seconds = 15 * 60, base_url = NULL, dry_run = FALSE, debug = FALSE, @@ -288,7 +288,7 @@ fetch_classic <- function(epidata_call, fetch_args = fetch_args_list()) { stopifnot(inherits(epidata_call, "epidata_call")) stopifnot(inherits(fetch_args, "fetch_args")) - response_content <- request_impl(epidata_call, "classic", fetch_args$fields, fetch_args$timeout_seconds) %>% + response_content <- request_impl(epidata_call, "classic", fetch_args$timeout_seconds, fetch_args$fields) %>% httr::content(as = "text", encoding = "UTF-8") %>% jsonlite::fromJSON(simplifyDataFrame = !fetch_args$disable_data_frame_parsing) @@ -318,7 +318,7 @@ fetch_debug <- function(epidata_call, fetch_args = fetch_args_list()) { stopifnot(inherits(epidata_call, "epidata_call")) stopifnot(inherits(fetch_args, "fetch_args")) - response <- request_impl(epidata_call, fetch_args$format_type, fetch_args$fields, fetch_args$timeout_seconds) + response <- request_impl(epidata_call, fetch_args$format_type, fetch_args$timeout_seconds, fetch_args$fields) content <- httr::content(response, "text", encoding = "UTF-8") content } @@ -366,7 +366,7 @@ with_base_url <- function(epidata_call, base_url) { #' @importFrom httr stop_for_status content http_type #' @importFrom xml2 read_html xml_find_all xml_text #' @keywords internal -request_impl <- function(epidata_call, format_type, fields = NULL, timeout_seconds = 30) { +request_impl <- function(epidata_call, format_type, timeout_seconds, fields) { stopifnot(inherits(epidata_call, "epidata_call")) stopifnot(format_type %in% c("json", "csv", "classic")) diff --git a/R/request.R b/R/request.R index 1e3be1b4..603e7bd3 100644 --- a/R/request.R +++ b/R/request.R @@ -22,7 +22,7 @@ join_url <- function(url, endpoint) { #' #' @importFrom httr RETRY #' @keywords internal -do_request <- function(url, params, timeout_seconds = 30) { +do_request <- function(url, params, timeout_seconds) { # don't retry in case of certain status codes key <- get_api_key() if (key != "") { diff --git a/man/do_request.Rd b/man/do_request.Rd index 7add7b85..e5cd7995 100644 --- a/man/do_request.Rd +++ b/man/do_request.Rd @@ -4,7 +4,7 @@ \alias{do_request} \title{performs the request} \usage{ -do_request(url, params, timeout_seconds = 30) +do_request(url, params, timeout_seconds) } \description{ You can test the authentication headers like so: diff --git a/man/fetch_args_list.Rd b/man/fetch_args_list.Rd index 9f7a2be4..ca6f7dcf 100644 --- a/man/fetch_args_list.Rd +++ b/man/fetch_args_list.Rd @@ -11,7 +11,7 @@ fetch_args_list( disable_date_parsing = FALSE, disable_data_frame_parsing = FALSE, return_empty = FALSE, - timeout_seconds = 30, + timeout_seconds = 15 * 60, base_url = NULL, dry_run = FALSE, debug = FALSE, diff --git a/man/request_impl.Rd b/man/request_impl.Rd index 476c7ee2..d6ad7669 100644 --- a/man/request_impl.Rd +++ b/man/request_impl.Rd @@ -5,7 +5,7 @@ \title{Makes a request to the API and returns the response, catching HTTP errors and forwarding the HTTP body in R errors} \usage{ -request_impl(epidata_call, format_type, fields = NULL, timeout_seconds = 30) +request_impl(epidata_call, format_type, timeout_seconds, fields) } \description{ Makes a request to the API and returns the response, catching diff --git a/tests/testthat/generate_test_data.R b/tests/testthat/generate_test_data.R index 1aac8393..834de416 100644 --- a/tests/testthat/generate_test_data.R +++ b/tests/testthat/generate_test_data.R @@ -4,7 +4,8 @@ epidata_call %>% url <- full_url(epidata_call) params <- request_arguments(epidata_call, "csv", NULL) -result <- do_request(url, params) %>% readr::write_rds(testthat::test_path("data/test-http401.rds")) +result <- do_request(url, params, timeout_seconds = 10 * 60) %>% + readr::write_rds(testthat::test_path("data/test-http401.rds")) epidata_call <- pvt_afhsb( auth = Sys.getenv("SECRET_API_AUTH_AFHSB"), @@ -14,7 +15,8 @@ epidata_call <- pvt_afhsb( ) url <- full_url(epidata_call) params <- request_arguments(epidata_call, "csv", NULL) -response <- do_request(url, params) %>% readr::write_rds(testthat::test_path("data/test-http500.rds")) +response <- do_request(url, params, timeout_seconds = 10 * 60) %>% + readr::write_rds(testthat::test_path("data/test-http500.rds")) epidata_call %>% fetch_debug(format_type = "classic") %>% diff --git a/tests/testthat/test-epidatacall.R b/tests/testthat/test-epidatacall.R index 642c609d..028ba1f4 100644 --- a/tests/testthat/test-epidatacall.R +++ b/tests/testthat/test-epidatacall.R @@ -10,7 +10,11 @@ test_that("request_impl http errors", { # see generate_test_data.R do_request = function(...) readRDS(testthat::test_path("data/test-http401.rds")), ) - expect_error(response <- epidata_call %>% request_impl("csv"), class = "http_401") + expect_error( + response <- epidata_call %>% + request_impl("csv", timeout_seconds = 30, fields = NULL), + class = "http_401" + ) # should give a 500 error (the afhsb endpoint is removed) @@ -18,7 +22,11 @@ test_that("request_impl http errors", { local_mocked_bindings( do_request = function(...) readRDS(testthat::test_path("data/test-http500.rds")) ) - expect_error(response <- epidata_call %>% request_impl("csv"), class = "http_500") + expect_error( + response <- epidata_call %>% + request_impl("csv", timeout_seconds = 30, fields = NULL), + class = "http_500" + ) }) test_that("fetch_args", { @@ -30,7 +38,7 @@ test_that("fetch_args", { disable_date_parsing = FALSE, disable_data_frame_parsing = FALSE, return_empty = FALSE, - timeout_seconds = 30, + timeout_seconds = 15 * 60, base_url = NULL, dry_run = FALSE, debug = FALSE, diff --git a/tests/testthat/test-request.R b/tests/testthat/test-request.R index da09f858..6a3d6536 100644 --- a/tests/testthat/test-request.R +++ b/tests/testthat/test-request.R @@ -1,4 +1,4 @@ test_that("requesting works", { - res <- do_request("https://httpbin.org/status/414", list()) + res <- do_request("https://httpbin.org/status/414", list(), timeout_seconds = 10 * 60) expect_equal(res$status_code, 414) }) From 1917e27c6d567967fac21892c193ae0c2acfa0ff Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Sat, 24 Feb 2024 10:05:53 -0800 Subject: [PATCH 3/4] ci: fix missing devtools dependency --- .github/workflows/document.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/document.yaml b/.github/workflows/document.yaml index 0608a2f2..9926fff3 100644 --- a/.github/workflows/document.yaml +++ b/.github/workflows/document.yaml @@ -26,8 +26,12 @@ jobs: - name: Install dependencies uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::roxygen2 - needs: roxygen2 + extra-packages: | + any::devtools + any::roxygen2 + needs: | + devtools + roxygen2 - name: Document run: roxygen2::roxygenise() From 4ef7429cb8c3692ffb7c08bb7a2e9a07596d9c61 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Thu, 15 Feb 2024 17:37:40 -0800 Subject: [PATCH 4/4] feat: make default cache directory R version portable with rappdirs::user_cache_dir() --- DESCRIPTION | 1 + R/cache.R | 10 +++++----- man/clear_cache.Rd | 6 +++--- man/set_cache.Rd | 6 +++--- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 61d97ec7..6d678434 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,6 +35,7 @@ Imports: MMWRweek, purrr, openssl, + rappdirs, readr, tibble, usethis, diff --git a/R/cache.R b/R/cache.R index 51337abc..75c9473f 100644 --- a/R/cache.R +++ b/R/cache.R @@ -76,9 +76,9 @@ cache_environ$epidatr_cache <- NULL #' ) #' #' @param cache_dir the directory in which the cache is stored. By default, this -#' is `tools::R_user_dir()` if on R 4.0+, but must be specified for earlier -#' versions of R. The path can be either relative or absolute. The -#' environmental variable is `EPIDATR_CACHE_DIR`. +#' is `rappdirs::user_cache_dir("R", version = "epidatr")`. The path can be +#' either relative or absolute. The environmental variable is +#' `EPIDATR_CACHE_DIR`. #' @param days the maximum length of time in days to keep any particular cached #' call. By default this is `1`. The environmental variable is #' `EPIDATR_CACHE_MAX_AGE_DAYS`. @@ -103,8 +103,8 @@ set_cache <- function(cache_dir = NULL, max_size = NULL, logfile = NULL, confirm = TRUE) { - if (is.null(cache_dir) && sessionInfo()$R.version$major >= 4) { - cache_dir <- Sys.getenv("EPIDATR_CACHE_DIR", unset = tools::R_user_dir("epidatr")) + if (is.null(cache_dir)) { + cache_dir <- Sys.getenv("EPIDATR_CACHE_DIR", unset = rappdirs::user_cache_dir("R", version = "epidatr")) } else if (is.null(cache_dir)) { # earlier version, so no tools cache_dir <- Sys.getenv("EPIDATR_CACHE_DIR") diff --git a/man/clear_cache.Rd b/man/clear_cache.Rd index f53148ad..e84740f8 100644 --- a/man/clear_cache.Rd +++ b/man/clear_cache.Rd @@ -11,9 +11,9 @@ clear_cache(..., disable = FALSE) Arguments passed on to \code{\link[=set_cache]{set_cache}} \describe{ \item{\code{cache_dir}}{the directory in which the cache is stored. By default, this -is \code{tools::R_user_dir()} if on R 4.0+, but must be specified for earlier -versions of R. The path can be either relative or absolute. The -environmental variable is \code{EPIDATR_CACHE_DIR}.} +is \code{rappdirs::user_cache_dir("R", version = "epidatr")}. The path can be +either relative or absolute. The environmental variable is +\code{EPIDATR_CACHE_DIR}.} \item{\code{days}}{the maximum length of time in days to keep any particular cached call. By default this is \code{1}. The environmental variable is \code{EPIDATR_CACHE_MAX_AGE_DAYS}.} diff --git a/man/set_cache.Rd b/man/set_cache.Rd index a3b42a1e..72bc0847 100644 --- a/man/set_cache.Rd +++ b/man/set_cache.Rd @@ -14,9 +14,9 @@ set_cache( } \arguments{ \item{cache_dir}{the directory in which the cache is stored. By default, this -is \code{tools::R_user_dir()} if on R 4.0+, but must be specified for earlier -versions of R. The path can be either relative or absolute. The -environmental variable is \code{EPIDATR_CACHE_DIR}.} +is \code{rappdirs::user_cache_dir("R", version = "epidatr")}. The path can be +either relative or absolute. The environmental variable is +\code{EPIDATR_CACHE_DIR}.} \item{days}{the maximum length of time in days to keep any particular cached call. By default this is \code{1}. The environmental variable is