diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index a7539a6..167b559 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -41,7 +41,7 @@ jobs: - name: Deploy to GitHub pages 🚀 if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@v4.6.8 + uses: JamesIves/github-pages-deploy-action@v4.7.2 with: clean: false branch: gh-pages diff --git a/R/check_dataset_name.R b/R/check_dataset_name.R index 4033888..2217a3d 100644 --- a/R/check_dataset_name.R +++ b/R/check_dataset_name.R @@ -6,27 +6,33 @@ #' @param dataset_name a resource ID #' @keywords internal #' @noRd -check_dataset_name <- function(dataset_name) { +check_dataset_name <- function(dataset_name, call = rlang::caller_env()) { # Starts and ends in a lowercase letter or number # Has only lowercase alphanum or hyphens inbetween dataset_name_regex <- "^[a-z0-9][a-z0-9\\-]+?[a-z0-9]$" if (!inherits(dataset_name, "character")) { - cli::cli_abort(c( - "The dataset name supplied {.var {dataset_name}} is invalid.", - "x" = "dataset_name must be of type character.", - "i" = "You supplied a {.cls {class(dataset_name)[0]}} value." - )) + cli::cli_abort( + c( + "The dataset name supplied {.var {dataset_name}} is invalid.", + "x" = "dataset_name must be of type character.", + "i" = "You supplied a {.cls {class(dataset_name)[0]}} value." + ), + call = call + ) } if (!grepl(dataset_name_regex, dataset_name)) { - cli::cli_abort(c( - "The dataset name supplied {.var {dataset_name}} is invalid", - "x" = "dataset_name must be in dash-case + cli::cli_abort( + c( + "The dataset name supplied {.var {dataset_name}} is invalid", + "x" = "dataset_name must be in dash-case (e.g., lowercase-words-separated-by-dashes).", - "i" = "You can find dataset names in the URL + "i" = "You can find dataset names in the URL of a dataset's page on {.url www.opendata.nhs.scot}." - )) + ), + call = call + ) } } diff --git a/R/check_res_id.R b/R/check_res_id.R index 4e1b8c2..3bf4504 100644 --- a/R/check_res_id.R +++ b/R/check_res_id.R @@ -8,32 +8,41 @@ #' @return TRUE / FALSE indicating the validity of the res_id #' @keywords internal #' @noRd -check_res_id <- function(res_id) { +check_res_id <- function(res_id, call = rlang::caller_env()) { # check res_id is single value if (length(res_id) > 1) { - cli::cli_abort(c( - "Argument {.var res_id} must be of length 1.", - i = "You supplied a res_id with a length of {length(res_id)}", - x = "`get_resource` does not currently support + cli::cli_abort( + c( + "Argument {.var res_id} must be of length 1.", + i = "You supplied a res_id with a length of {length(res_id)}", + x = "`get_resource` does not currently support requests for multiple resources simultaneously." - )) + ), + call = call + ) } # check res_id is character if (!inherits(res_id, "character")) { - cli::cli_abort(c( - "Argument {.var res_id} must be of type character", - i = "You supplied a {.var res_id} with type {.cls {class(res_id)[1]}}" - )) + cli::cli_abort( + c( + "Argument {.var res_id} must be of type character", + i = "You supplied a {.var res_id} with type {.cls {class(res_id)[1]}}" + ), + call = call + ) } # check regex pattern res_id_regex <- "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" if (!grepl(res_id_regex, res_id)) { - cli::cli_abort(c( - "Argument {.var res_id} is in an invalid format.", - i = "You can find a resource's ID in the URL of it's page on {.url www.opendata.nhs.scot}." - )) + cli::cli_abort( + c( + "Argument {.var res_id} is in an invalid format.", + i = "You can find a resource's ID in the URL of it's page on {.url www.opendata.nhs.scot}." + ), + call = call + ) } } diff --git a/R/dump_download.R b/R/dump_download.R index 3e3a5f3..bb07180 100644 --- a/R/dump_download.R +++ b/R/dump_download.R @@ -4,7 +4,7 @@ #' @return dataframe containing resource records #' @keywords internal #' @noRd -dump_download <- function(res_id) { +dump_download <- function(res_id, call = rlang::caller_env()) { # fetch the data content <- suppressMessages( phs_GET("dump", res_id) @@ -12,9 +12,12 @@ dump_download <- function(res_id) { # if content is a web page if ("xml_document" %in% class(content)) { - cli::cli_abort(c( - "Can't find resource with ID {.var {res_id}} in datastore." - )) + cli::cli_abort( + c( + "Can't find resource with ID {.var {res_id}} in datastore." + ), + call = call + ) } # return data diff --git a/R/error_check.R b/R/error_check.R index 46d6576..7960008 100644 --- a/R/error_check.R +++ b/R/error_check.R @@ -3,14 +3,17 @@ #' @param content object produced by `httr::content` #' @keywords internal #' @noRd -error_check <- function(content) { +error_check <- function(content, call = rlang::caller_env()) { # if content is not a list, # stop for content (a string describing an error) if (!is.list(content)) { - cli::cli_abort(c( - "API error", - x = content - )) + cli::cli_abort( + c( + "API error", + x = content + ), + call = call + ) } # if there is no error status/message in the content, @@ -24,8 +27,11 @@ error_check <- function(content) { # generate error message and stop error_text <- parse_error(content$error) - cli::cli_abort(c( - "API error.", - x = error_text - )) + cli::cli_abort( + c( + "API error.", + x = error_text + ), + call = call + ) } diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index d8b6e81..fbb1047 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -17,7 +17,7 @@ #' @return a string with the resource id #' @keywords internal #' @noRd -get_latest_resource_id <- function(dataset_name) { +get_latest_resource_id <- function(dataset_name, call = rlang::caller_env()) { # send the api request query <- list("id" = dataset_name) content <- phs_GET("package_show", query) @@ -53,5 +53,7 @@ get_latest_resource_id <- function(dataset_name) { if (all_id_data_first_row$created_date == all_id_data_first_row$most_recent_date_created) { return(all_id_data_first_row$id) } - cli::cli_abort("The most recent id could not be identified") + cli::cli_abort("The most recent id could not be identified", + call = call + ) } diff --git a/R/get_resource.R b/R/get_resource.R index 743039b..6bda6a6 100644 --- a/R/get_resource.R +++ b/R/get_resource.R @@ -40,12 +40,43 @@ get_resource <- function(res_id, # check res_id check_res_id(res_id) + parsed_col_select <- parse_col_select(col_select) + parsed_row_filters <- parse_row_filters(row_filters) + + if (is.logical(parsed_row_filters) && !parsed_row_filters) { + if (!is.null(row_filters)) { + col_select_sql <- dplyr::if_else( + is.null(col_select), + "*", + paste0("\"", paste(col_select, collapse = "\",\""), "\"") + ) + + row_filters_sql <- paste( + purrr::imap_chr( + row_filters, + function(value, col) paste0("\"", col, "\"=\'", value, "\'", collapse = " OR ") + ), + collapse = ") AND (" + ) + + sql <- sprintf( + "SELECT %s FROM \"%s\" WHERE (%s) %s", + col_select_sql, + res_id, + row_filters_sql, + dplyr::if_else(is.null(rows), "", paste("LIMIT", rows)) + ) + + return(get_resource_sql(sql)) + } + } + # define query query <- list( id = res_id, limit = rows, - q = parse_row_filters(row_filters), - fields = parse_col_select(col_select) + q = parsed_row_filters, + fields = parsed_col_select ) # if dump should be used, use it diff --git a/R/parse_col_select.R b/R/parse_col_select.R index f2e76f3..c218fee 100644 --- a/R/parse_col_select.R +++ b/R/parse_col_select.R @@ -5,13 +5,16 @@ #' @return a character string #' @keywords internal #' @noRd -parse_col_select <- function(col_select) { +parse_col_select <- function(col_select, call = rlang::caller_env()) { if (is.null(col_select)) { return(NULL) } if (!inherits(col_select, "character")) { - cli::cli_abort("{.arg col_select} must be a {.cls character} vector, not a {.cls {class(col_select)}} vector.") + cli::cli_abort( + "{.arg col_select} must be a {.cls character} vector, not a {.cls {class(col_select)}} vector.", + call = call + ) } return(paste0(col_select, collapse = ",")) diff --git a/R/parse_row_filters.R b/R/parse_row_filters.R index 4144d3c..594b1b1 100644 --- a/R/parse_row_filters.R +++ b/R/parse_row_filters.R @@ -4,7 +4,7 @@ #' @return a json as a character string #' @keywords internal #' @noRd -parse_row_filters <- function(row_filters) { +parse_row_filters <- function(row_filters, call = rlang::caller_env()) { # exit function if no filters if (is.null(row_filters)) { return(NULL) @@ -14,34 +14,39 @@ parse_row_filters <- function(row_filters) { if (class(row_filters) != "list" && !is.character(row_filters) && !is.numeric(row_filters)) { cli::cli_abort( "{.arg row_filters} must be a named {.cls list} or a named - {.cls character} or {.cls numeric} vector, not a {.cls {class(row_filters)}}." + {.cls character} or {.cls numeric} vector, not a {.cls {class(row_filters)}}.", + call = call ) } # Ensure it's elements are named if (is.null(names(row_filters)) || any(names(row_filters) == "")) { - cli::cli_abort("{.arg row_filters} should be a named {.cls list}.") + cli::cli_abort( + "{.arg row_filters} should be a named {.cls list}.", + call = call + ) } - # check if any filters in list have length > 1 - too_many <- purrr::map_lgl(row_filters, ~ length(.x) > 1) - - if (any(too_many)) { - cli::cli_abort(c( - "Invalid input for {.arg row_filters}", - i = "The {.val {names(row_filters)[which(too_many)]}} filter{?s} {?has/have} too many values.", - x = "The {.arg row_filters} list must only contain vectors of length 1." - )) - } # check if any items in the list/vector are duplicates duplicates <- duplicated(names(row_filters)) if (any(duplicates)) { - cli::cli_abort(c( - "Invalid input for {.arg row_filters}", - x = "The {.val {names(row_filters)[which(duplicates)]}} filter{?s} {?is/are} duplicated.", - i = "Only one filter per field is currently supported by {.fun get_resource}." - )) + cli::cli_abort( + c( + "Invalid input for {.arg row_filters}", + x = "The {.val {names(row_filters)[which(duplicates)]}} filter{?s} {?is/are} duplicated.", + i = "Only one filter per field is currently supported by {.fun get_resource}." + ), + call = call + ) + } + + # check if any filters in list have length > 1 + multiple <- purrr::map_lgl(row_filters, ~ length(.x) > 1) + + if (any(multiple)) { + # Default to using SQL + return(FALSE) } filter_body <- paste0( @@ -49,7 +54,5 @@ parse_row_filters <- function(row_filters) { collapse = "," ) - return( - paste0("{", filter_body, "}") - ) + return(paste0("{", filter_body, "}")) } diff --git a/R/phs_GET.R b/R/phs_GET.R index d59a340..ac27318 100644 --- a/R/phs_GET.R +++ b/R/phs_GET.R @@ -5,7 +5,7 @@ #' @return content of a httr::GET request #' @keywords internal #' @noRd -phs_GET <- function(action, query, verbose = FALSE) { +phs_GET <- function(action, query, verbose = FALSE, call = rlang::caller_env()) { # define URL url <- request_url(action, query) @@ -21,10 +21,13 @@ phs_GET <- function(action, query, verbose = FALSE) { # Check for a response from the server if (!inherits(response, "response")) { - cli::cli_abort(c( - "Can't connect to the CKAN server.", - i = "Check your network/proxy settings." - )) + cli::cli_abort( + c( + "Can't connect to the CKAN server.", + i = "Check your network/proxy settings." + ), + call = call + ) } # Extract the content from the HTTP response @@ -33,7 +36,7 @@ phs_GET <- function(action, query, verbose = FALSE) { ) # detect/handle errors - error_check(content) + error_check(content, call = call) if (verbose) cat("GET request successful.\n") return(content) diff --git a/R/request_url.R b/R/request_url.R index 856c53b..c8022d0 100644 --- a/R/request_url.R +++ b/R/request_url.R @@ -5,7 +5,7 @@ #' @return a URL as a character string #' @keywords internal #' @noRd -request_url <- function(action, query) { +request_url <- function(action, query, call = rlang::caller_env()) { # check action is valid valid_actions <- c( "datastore_search", @@ -16,10 +16,13 @@ request_url <- function(action, query) { "resource_show" ) if (!(action %in% valid_actions)) { - cli::cli_abort(c( - "API call failed.", - x = "{.val {action}} is an invalid {.arg action} argument." - )) + cli::cli_abort( + c( + "API call failed.", + x = "{.val {action}} is an invalid {.arg action} argument." + ), + call = call + ) } base_url <- "https://www.opendata.nhs.scot" diff --git a/R/suggest_dataset_name.R b/R/suggest_dataset_name.R index 13d124a..17d6561 100644 --- a/R/suggest_dataset_name.R +++ b/R/suggest_dataset_name.R @@ -4,7 +4,7 @@ #' @param dataset_name a string to be matched against valid dataset names #' @keywords internal #' @noRd -suggest_dataset_name <- function(dataset_name) { +suggest_dataset_name <- function(dataset_name, call = rlang::caller_env()) { content <- phs_GET("package_list", "") dataset_names <- unlist(content$result) @@ -14,20 +14,26 @@ suggest_dataset_name <- function(dataset_name) { # if min distance is too big, abort if (min(string_distances) > 10) { - cli::cli_abort(c( - "Can't find the dataset name + cli::cli_abort( + c( + "Can't find the dataset name {.var {dataset_name}}, or a close match.", - i = "Find a dataset's name in the URL + i = "Find a dataset's name in the URL of its page on {.url www.opendata.nhs.scot.}" - )) + ), + call = call + ) } # find closet match closest_match <- dataset_names[which(string_distances == min(string_distances))] # throw error with suggestion - cli::cli_abort(c( - "Can't find the dataset name {.var {dataset_name}}.", - "i" = "Did you mean {?any of }{.val {closest_match}}?" - )) + cli::cli_abort( + c( + "Can't find the dataset name {.var {dataset_name}}.", + "i" = "Did you mean {?any of }{.val {closest_match}}?" + ), + call = call + ) } diff --git a/tests/testthat/test-get_dataset.R b/tests/testthat/test-get_dataset.R index 4bf1139..2029c53 100644 --- a/tests/testthat/test-get_dataset.R +++ b/tests/testthat/test-get_dataset.R @@ -50,3 +50,19 @@ test_that("get_dataset filters error properly", { regexp = "API error" ) }) + +test_that("get_dataset works with multiple filters", { + n_resources <- 3 + columns <- c("Date", "PracticeCode", "HSCP", "AllAges") + + data <- get_dataset("gp-practice-populations", + max_resources = n_resources, + row_filters = list(PracticeCode = c("10002", "10017")), + col_select = columns + ) + + expect_s3_class(data, "tbl_df") + expect_equal(nrow(data), n_resources * 6) + expect_named(data, columns) + expect_true(all(data[["PracticeCode"]] %in% c("10002", "10017"))) +}) diff --git a/tests/testthat/test-get_resource.R b/tests/testthat/test-get_resource.R index 128a0ce..e5917e1 100644 --- a/tests/testthat/test-get_resource.R +++ b/tests/testthat/test-get_resource.R @@ -30,3 +30,97 @@ test_that("returns data with row specifications", { expect_equal(nrow(get_resource(res_id = gp_list_apr_2021, rows = 999)), 926) %>% expect_warning() }) + +test_that("returns data for multiple filters", { + data_row_limit <- get_resource( + res_id = "e4985a62-9d59-4e71-8800-3f7ca29ffe0c", + col_select = c("GPPractice", "DMDCode"), + row_filters = list("GPPractice" = c("80005", "80202")), + rows = 100 + ) + + expect_s3_class(data_row_limit, "tbl_df") + expect_equal(nrow(data_row_limit), 100) + expect_named(data_row_limit, c("GPPractice", "DMDCode")) + + data_full <- get_resource( + res_id = "e4985a62-9d59-4e71-8800-3f7ca29ffe0c", + col_select = c("GPPractice", "DMDCode", "PrescribedType"), + row_filters = list( + "GPPractice" = c("80005", "80202"), + "PrescribedType" = "AMP" + ) + ) + + expect_s3_class(data_full, "tbl_df") + expect_equal(nrow(data_full), 1108) + expect_named(data_full, c("GPPractice", "DMDCode", "PrescribedType")) + expect_length(unique(data_full$GPPractice), 2) + expect_length(unique(data_full$PrescribedType), 1) +}) + +test_that("returns data for multiple filters in mixed format", { + delays <- get_resource( + res_id = "fd354e4b-6211-48ba-8e4f-8356a5ed4215", + col_select = c("MonthOfDelay", "ReasonForDelay", "NumberOfDelayedBedDays"), + row_filters = list("HBT" = "S08000031", MonthOfDelay = c(201607:201707)) + ) + + expect_s3_class(delays, "tbl_df") + expect_equal(nrow(delays), 195) + expect_named(delays, c("MonthOfDelay", "ReasonForDelay", "NumberOfDelayedBedDays")) + expect_length(unique(delays$MonthOfDelay), 13) +}) + +test_that("returns data for multiple filters for all columns", { + prescriptions <- get_resource( + res_id = "d1fbede3-98c4-436e-9e75-2ed807a36075", + row_filters = list( + "HBT" = "S08000015", + "DMDCode" = c("940711000001101", "1004511000001101", "1014311000001109") + ) + ) + + expect_s3_class(prescriptions, "tbl_df") + expect_equal(nrow(prescriptions), 114) + expect_named(prescriptions, c( + "HBT", + "GPPractice", + "DMDCode", + "BNFItemCode", + "BNFItemDescription", + "PrescribedType", + "NumberOfPaidItems", + "PaidQuantity", + "GrossIngredientCost", + "PaidDateMonth" + )) + expect_length(unique(prescriptions$GPPractice), 55) + expect_setequal( + prescriptions$DMDCode, + c("940711000001101", "1004511000001101", "1014311000001109") + ) + expect_setequal(prescriptions$HBT, "S08000015") +}) + +test_that("errors on invalid filters", { + # non-existent column in row_filters + expect_error( + delays <- get_resource( + res_id = "fd354e4b-6211-48ba-8e4f-8356a5ed4215", + col_select = c("MonthOfDelay", "ReasonForDelay", "NumberOfDelayedBedDays"), + row_filters = c("HBT" = "S08000031", "Month" = 201607) + ), + regexp = "row_filters: invalid value" + ) + + # non-existent column in col_select + expect_error( + delays <- get_resource( + res_id = "fd354e4b-6211-48ba-8e4f-8356a5ed4215", + col_select = c("Month", "ReasonForDelay", "NumberOfDelayedBedDays"), + row_filters = c("HBT" = "S08000031", "MonthOfDelay" = 201607) + ), + regexp = "col_select: invalid value" + ) +}) diff --git a/tests/testthat/test-parse_row_filters.R b/tests/testthat/test-parse_row_filters.R index 90558d5..62f9b5e 100644 --- a/tests/testthat/test-parse_row_filters.R +++ b/tests/testthat/test-parse_row_filters.R @@ -23,10 +23,9 @@ test_that("throws error for bad types", { ) }) -test_that("throws error for length > 1", { - expect_error( +test_that("returns FALSE for length > 1", { + expect_false( parse_row_filters(list(x = letters)), - regexp = " has too many values\\." ) })