Skip to content

Commit

Permalink
Merge pull request #46 from Moohan/feature/get_dataset_filter
Browse files Browse the repository at this point in the history
Add filtering options to `get_dataset`
  • Loading branch information
csillasch authored Nov 14, 2024
2 parents b0b810c + 29f88f5 commit 3898526
Show file tree
Hide file tree
Showing 8 changed files with 140 additions and 28 deletions.
6 changes: 5 additions & 1 deletion R/get_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
get_dataset <- function(dataset_name,
max_resources = NULL,
rows = NULL,
row_filters = NULL,
col_select = NULL,
include_context = FALSE) {
# throw error if name type/format is invalid
check_dataset_name(dataset_name)
Expand Down Expand Up @@ -50,7 +52,9 @@ get_dataset <- function(dataset_name,
all_data <- purrr::map(
selection_ids,
get_resource,
rows = rows
rows = rows,
row_filters = row_filters,
col_select = col_select,
)

# resolve class issues
Expand Down
8 changes: 5 additions & 3 deletions R/parse_col_select.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ parse_col_select <- function(col_select) {
return(NULL)
}

return(
paste0(col_select, collapse = ",")
)
if (!inherits(col_select, "character")) {
cli::cli_abort("{.arg col_select} must be a {.cls character} vector, not a {.cls {class(col_select)}} vector.")
}

return(paste0(col_select, collapse = ","))
}
41 changes: 24 additions & 17 deletions R/parse_row_filters.R
Original file line number Diff line number Diff line change
@@ -1,37 +1,44 @@
#' Create JSON 'dict' from named list or vector
#' @description Formats a list or named vector into a valid query
#' @param row_filters list or named vectors matching fileds to values
#' @param row_filters list or named vectors matching fields to values
#' @return a json as a character string
parse_row_filters <- function(row_filters) {
# exit function if no filters
if (is.null(row_filters)) {
return(NULL)
}

# Check if `row_filters` is a list or a character or numeric vector
if (class(row_filters) != "list" && !is.character(row_filters) && !is.numeric(row_filters)) {
cli::cli_abort(
"{.arg row_filters} must be a named {.cls list} or a named
{.cls character} or {.cls numeric} vector, not a {.cls {class(row_filters)}}."
)
}

# Ensure it's elements are named
if (is.null(names(row_filters)) || any(names(row_filters) == "")) {
cli::cli_abort("{.arg row_filters} should be a named {.cls list}.")
}

# check if any filters in list have length > 1
too_many <- sapply(row_filters, length) > 1
too_many <- purrr::map_lgl(row_filters, ~ length(.x) > 1)

if (any(too_many)) {
cli::cli_abort(c(
"Invalid input for {.var row_filters}",
i = "{names(row_filters)[which(too_many)]} in {.var row_filters} has too many values. ",
x = "The {.var row_filters} list must only contain vectors of length 1."
"Invalid input for {.arg row_filters}",
i = "The {.val {names(row_filters)[which(too_many)]}} filter{?s} {?has/have} too many values.",
x = "The {.arg row_filters} list must only contain vectors of length 1."
))
}

# check if any items in the list/vector have the same name
# find number of unique names
n_u_row_filters <- length(unique(names(row_filters)))
# find total number of names
n_row_filters <- length(names(row_filters))
# if same, all names are unique
unique_names <- n_u_row_filters == n_row_filters

if (!unique_names) {
# check if any items in the list/vector are duplicates
duplicates <- duplicated(names(row_filters))
if (any(duplicates)) {
cli::cli_abort(c(
"Invalid input for {.var row_filters}",
x = "One or more elements in {.var row_filters} have the same name.",
i = "Only one filter per field is currently supported by `get_resource`."
"Invalid input for {.arg row_filters}",
x = "The {.val {names(row_filters)[which(duplicates)]}} filter{?s} {?is/are} duplicated.",
i = "Only one filter per field is currently supported by {.fun get_resource}."
))
}

Expand Down
10 changes: 10 additions & 0 deletions man/get_dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/parse_row_filters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 20 additions & 2 deletions tests/testthat/test-get_dataset.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
skip_if_offline(host = "www.opendata.nhs.scot")

test_that("returns data in the expected format", {
test_that("get_dataset returns data in the expected format", {
n_resources <- 2
n_rows <- 2
data <- get_dataset(
Expand All @@ -15,7 +15,25 @@ test_that("returns data in the expected format", {
expect_named(data)
})

test_that("errors properly", {
test_that("get_dataset works properly with filters", {
n_resources <- 3
n_rows <- 10
columns <- c("Date", "PracticeCode", "HSCP", "AllAges")

data <- get_dataset("gp-practice-populations",
max_resources = n_resources,
rows = n_rows,
row_filters = list(HSCP = "S37000026"),
col_select = columns
)

expect_s3_class(data, "tbl_df")
expect_equal(nrow(data), n_resources * n_rows)
expect_named(data, columns)
expect_true(all(data[["HSCP"]] == "S37000026"))
})

test_that("get_dataset errors properly", {
expect_error(get_dataset("Mal-formed-name"),
regexp = "The dataset name supplied `Mal-formed-name` is invalid"
)
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-get_resource_dump.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ test_that("returns full data if rows is set to over 99999", {
data <- get_resource(
res_id = gp_list_apr_2021,
rows = 9999999,
row_filters = c("GPPracticeName" = "The Blue Practice")
row_filters = list("GPPracticeName" = "The Blue Practice")
),
regexp = "Can't request over 99,999 rows"
)
Expand Down
77 changes: 74 additions & 3 deletions tests/testthat/test-parse_row_filters.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,95 @@ test_that("returns NULL if `row_filters` = NULL", {
)
})

test_that("throws error for bad types", {
expect_error(
parse_row_filters(mtcars),
regexp = " must be a named .+? not a "
)
expect_error(
parse_row_filters(c(x = TRUE)),
regexp = " must be a named .+? not a "
)
expect_error(
parse_row_filters(c(x = NA)),
regexp = " must be a named .+? not a "
)
expect_error(
parse_row_filters(c(x = as.Date("2000-01-01"))),
regexp = " must be a named .+? not a "
)
})

test_that("throws error for length > 1", {
expect_error(
parse_row_filters(list(x = letters)),
regexp = "(list must only contain vectors of length 1.)"
regexp = " has too many values\\."
)
})

test_that("throws error when un-named", {
expect_error(
parse_row_filters(list(1, 2)),
regexp = " should be a named "
)
expect_error(
parse_row_filters(list(a = 1, 2)),
regexp = " should be a named "
)
expect_error(
parse_row_filters(c(1, 2)),
regexp = " should be a named "
)
expect_error(
parse_row_filters(c(a = 1, 2)),
regexp = " should be a named "
)
})

test_that("throws error for non-unique names", {
expect_error(
parse_row_filters(list(x = 1, x = 2)),
regexp = "Only one filter per field is currently supported by `get_resource`"
regexp = "Only one filter per field is currently supported"
)
expect_error(
parse_row_filters(c(x = 1, x = 2)),
regexp = "Only one filter per field is currently supported"
)
})


test_that("returns JSON string from a named vector", {
expect_true(
jsonlite::validate(
parse_row_filters(c(x = 5.0, y = 6.0))
)
)
expect_true(
jsonlite::validate(
parse_row_filters(c(x = 5L, y = 6L))
)
)
expect_true(
jsonlite::validate(
parse_row_filters(c(x = "a", y = "b"))
)
)
})

test_that("returns JSON string from list", {
expect_true(
jsonlite::validate(
parse_row_filters(list(x = 5, y = 6))
parse_row_filters(list(x = 5.0, y = 6.0))
)
)
expect_true(
jsonlite::validate(
parse_row_filters(list(x = 5L, y = 6L))
)
)
expect_true(
jsonlite::validate(
parse_row_filters(list(x = "a", y = "b"))
)
)
})

0 comments on commit 3898526

Please sign in to comment.