Merge pull request #46 from Moohan/feature/get_dataset_filter

Add filtering options to `get_dataset`
Public-Health-Scotland · Nov 14, 2024 · 3898526 · 3898526
2 parents b0b810c + 29f88f5
commit 3898526
Show file tree

Hide file tree

Showing 8 changed files with 140 additions and 28 deletions.
diff --git a/R/get_dataset.R b/R/get_dataset.R
@@ -21,6 +21,8 @@
 get_dataset <- function(dataset_name,
                         max_resources = NULL,
                         rows = NULL,
+                        row_filters = NULL,
+                        col_select = NULL,
                         include_context = FALSE) {
   # throw error if name type/format is invalid
   check_dataset_name(dataset_name)
@@ -50,7 +52,9 @@ get_dataset <- function(dataset_name,
   all_data <- purrr::map(
     selection_ids,
     get_resource,
-    rows = rows
+    rows = rows,
+    row_filters = row_filters,
+    col_select = col_select,
   )
 
   # resolve class issues

diff --git a/R/parse_col_select.R b/R/parse_col_select.R
@@ -8,7 +8,9 @@ parse_col_select <- function(col_select) {
     return(NULL)
   }
 
-  return(
-    paste0(col_select, collapse = ",")
-  )
+  if (!inherits(col_select, "character")) {
+    cli::cli_abort("{.arg col_select} must be a {.cls character} vector, not a {.cls {class(col_select)}} vector.")
+  }
+
+  return(paste0(col_select, collapse = ","))
 }
diff --git a/R/parse_row_filters.R b/R/parse_row_filters.R
@@ -1,37 +1,44 @@
 #' Create JSON 'dict' from named list or vector
 #' @description Formats a list or named vector into a valid query
-#' @param row_filters list or named vectors matching fileds to values
+#' @param row_filters list or named vectors matching fields to values
 #' @return a json as a character string
 parse_row_filters <- function(row_filters) {
   # exit function if no filters
   if (is.null(row_filters)) {
     return(NULL)
   }
 
+  # Check if `row_filters` is a list or a character or numeric vector
+  if (class(row_filters) != "list" && !is.character(row_filters) && !is.numeric(row_filters)) {
+    cli::cli_abort(
+      "{.arg row_filters} must be a named {.cls list} or a named
+      {.cls character} or {.cls numeric} vector, not a {.cls {class(row_filters)}}."
+    )
+  }
+
+  # Ensure it's elements are named
+  if (is.null(names(row_filters)) || any(names(row_filters) == "")) {
+    cli::cli_abort("{.arg row_filters} should be a named {.cls list}.")
+  }
+
   # check if any filters in list have length > 1
-  too_many <- sapply(row_filters, length) > 1
+  too_many <- purrr::map_lgl(row_filters, ~ length(.x) > 1)
 
   if (any(too_many)) {
     cli::cli_abort(c(
-      "Invalid input for {.var row_filters}",
-      i = "{names(row_filters)[which(too_many)]} in {.var row_filters} has too many values. ",
-      x = "The {.var row_filters} list must only contain vectors of length 1."
+      "Invalid input for {.arg row_filters}",
+      i = "The {.val {names(row_filters)[which(too_many)]}} filter{?s} {?has/have} too many values.",
+      x = "The {.arg row_filters} list must only contain vectors of length 1."
     ))
   }
 
-  # check if any items in the list/vector have the same name
-  # find number of unique names
-  n_u_row_filters <- length(unique(names(row_filters)))
-  # find total number of names
-  n_row_filters <- length(names(row_filters))
-  # if same, all names are unique
-  unique_names <- n_u_row_filters == n_row_filters
-
-  if (!unique_names) {
+  # check if any items in the list/vector are duplicates
+  duplicates <- duplicated(names(row_filters))
+  if (any(duplicates)) {
     cli::cli_abort(c(
-      "Invalid input for {.var row_filters}",
-      x = "One or more elements in {.var row_filters} have the same name.",
-      i = "Only one filter per field is currently supported by `get_resource`."
+      "Invalid input for {.arg row_filters}",
+      x = "The {.val {names(row_filters)[which(duplicates)]}} filter{?s} {?is/are} duplicated.",
+      i = "Only one filter per field is currently supported by {.fun get_resource}."
     ))
   }
 

diff --git a/man/get_dataset.Rd b/man/get_dataset.Rd
diff --git a/man/parse_row_filters.Rd b/man/parse_row_filters.Rd
diff --git a/tests/testthat/test-get_dataset.R b/tests/testthat/test-get_dataset.R
@@ -1,6 +1,6 @@
 skip_if_offline(host = "www.opendata.nhs.scot")
 
-test_that("returns data in the expected format", {
+test_that("get_dataset returns data in the expected format", {
   n_resources <- 2
   n_rows <- 2
   data <- get_dataset(
@@ -15,7 +15,25 @@ test_that("returns data in the expected format", {
   expect_named(data)
 })
 
-test_that("errors properly", {
+test_that("get_dataset works properly with filters", {
+  n_resources <- 3
+  n_rows <- 10
+  columns <- c("Date", "PracticeCode", "HSCP", "AllAges")
+
+  data <- get_dataset("gp-practice-populations",
+    max_resources = n_resources,
+    rows = n_rows,
+    row_filters = list(HSCP = "S37000026"),
+    col_select = columns
+  )
+
+  expect_s3_class(data, "tbl_df")
+  expect_equal(nrow(data), n_resources * n_rows)
+  expect_named(data, columns)
+  expect_true(all(data[["HSCP"]] == "S37000026"))
+})
+
+test_that("get_dataset errors properly", {
   expect_error(get_dataset("Mal-formed-name"),
     regexp = "The dataset name supplied `Mal-formed-name` is invalid"
   )

diff --git a/tests/testthat/test-get_resource_dump.R b/tests/testthat/test-get_resource_dump.R
@@ -15,7 +15,7 @@ test_that("returns full data if rows is set to over 99999", {
     data <- get_resource(
       res_id = gp_list_apr_2021,
       rows = 9999999,
-      row_filters = c("GPPracticeName" = "The Blue Practice")
+      row_filters = list("GPPracticeName" = "The Blue Practice")
     ),
     regexp = "Can't request over 99,999 rows"
   )

diff --git a/tests/testthat/test-parse_row_filters.R b/tests/testthat/test-parse_row_filters.R
@@ -4,24 +4,95 @@ test_that("returns NULL if `row_filters` = NULL", {
   )
 })
 
+test_that("throws error for bad types", {
+  expect_error(
+    parse_row_filters(mtcars),
+    regexp = " must be a named .+? not a "
+  )
+  expect_error(
+    parse_row_filters(c(x = TRUE)),
+    regexp = " must be a named .+? not a "
+  )
+  expect_error(
+    parse_row_filters(c(x = NA)),
+    regexp = " must be a named .+? not a "
+  )
+  expect_error(
+    parse_row_filters(c(x = as.Date("2000-01-01"))),
+    regexp = " must be a named .+? not a "
+  )
+})
+
 test_that("throws error for length > 1", {
   expect_error(
     parse_row_filters(list(x = letters)),
-    regexp = "(list must only contain vectors of length 1.)"
+    regexp = " has too many values\\."
+  )
+})
+
+test_that("throws error when un-named", {
+  expect_error(
+    parse_row_filters(list(1, 2)),
+    regexp = " should be a named "
+  )
+  expect_error(
+    parse_row_filters(list(a = 1, 2)),
+    regexp = " should be a named "
+  )
+  expect_error(
+    parse_row_filters(c(1, 2)),
+    regexp = " should be a named "
+  )
+  expect_error(
+    parse_row_filters(c(a = 1, 2)),
+    regexp = " should be a named "
   )
 })
 
 test_that("throws error for non-unique names", {
   expect_error(
     parse_row_filters(list(x = 1, x = 2)),
-    regexp = "Only one filter per field is currently supported by `get_resource`"
+    regexp = "Only one filter per field is currently supported"
+  )
+  expect_error(
+    parse_row_filters(c(x = 1, x = 2)),
+    regexp = "Only one filter per field is currently supported"
+  )
+})
+
+
+test_that("returns JSON string from a named vector", {
+  expect_true(
+    jsonlite::validate(
+      parse_row_filters(c(x = 5.0, y = 6.0))
+    )
+  )
+  expect_true(
+    jsonlite::validate(
+      parse_row_filters(c(x = 5L, y = 6L))
+    )
+  )
+  expect_true(
+    jsonlite::validate(
+      parse_row_filters(c(x = "a", y = "b"))
+    )
   )
 })
 
 test_that("returns JSON string from list", {
   expect_true(
     jsonlite::validate(
-      parse_row_filters(list(x = 5, y = 6))
+      parse_row_filters(list(x = 5.0, y = 6.0))
+    )
+  )
+  expect_true(
+    jsonlite::validate(
+      parse_row_filters(list(x = 5L, y = 6L))
+    )
+  )
+  expect_true(
+    jsonlite::validate(
+      parse_row_filters(list(x = "a", y = "b"))
     )
   )
 })