Merge pull request #32 from Public-Health-Scotland/issue10

Issue10
Public-Health-Scotland · Aug 29, 2024 · e8196b1 · e8196b1
2 parents 03471e5 + c1be162
commit e8196b1
Show file tree

Hide file tree

Showing 12 changed files with 213 additions and 1 deletion.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -4,7 +4,8 @@ Version: 0.1.0.9000
 Authors@R: c(
     person("Csilla", "Scharle", , "[email protected]", role = c("aut", "cre")),
     person("James", "McMahon", , "[email protected]", role = "aut"),
-    person("David", "Aikman", , "[email protected]", role = "aut")
+    person("David", "Aikman", , "[email protected]", role = "aut"),
+    person("Ross", "Hull", , "[email protected]", role = "aut")
   )
 Description: Functions to extract and interact with data from the Scottish
     Health and Social Care Open Data platform.

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,7 +2,10 @@
 
 export("%>%")
 export(get_dataset)
+export(get_dataset_additional_info)
 export(get_latest_resource)
 export(get_resource)
 export(get_resource_sql)
+export(list_datasets)
+export(list_resources)
 importFrom(magrittr,"%>%")
diff --git a/NEWS.md b/NEWS.md
@@ -7,6 +7,8 @@ dates to returned data (#24).
 - `get_dataset()` will now suggest multiple dataset names, when the dataset 
 you've asked for doesn't exist (i.e. there's a typo) and there are multiple 
 likely candidates  (#28).
+- Two new functions `list_datasets()` and `list_resources()` allow browsing
+available datasets and resources (#10).
 
 # phsopendata 0.1.0 (2021-07-22)
 

diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R
@@ -0,0 +1,43 @@
+#' get a datasets additional info
+#'
+#' `get_dataset_additional_info()` returns a tibble of dataset names along with
+#' the amount of resources it has and the date it was last updated.Last updated
+#' is taken to mean the most recent date a resource within the dataset was
+#' created or modified.
+#'
+#' @inheritParams get_dataset
+#'
+#' @return a [tibble][tibble::tibble-package] with the data
+#' @export
+#' @examples
+#' get_dataset_additional_info("gp-practice-populations")
+get_dataset_additional_info <- function(dataset_name) {
+  # define query
+
+  query <- list("id" = dataset_name)
+  # fetch the data
+  content <- phs_GET("package_show", query)
+
+  # get the amount of resources
+  amount_of_resources <- content$result$resources %>%
+    length()
+
+
+  # get the last recourse created and modified dates
+  last_resource_created_date <- purrr::map_chr(content$result$resources, ~ .$created)
+  last_resource_modified_date <- purrr::map_chr(content$result$resources, ~ .$last_modified)
+
+  # get the latest between the created and modified dates and change to datetime format
+  most_recent_resource_date <- max(last_resource_modified_date, last_resource_created_date) %>%
+    as.POSIXct(format = "%FT%X", tz = "UTC")
+
+
+  # create tibble to return
+  return_value <- tibble::tibble(
+    "name" = dataset_name,
+    "n_resources" = amount_of_resources,
+    "last_updated" = most_recent_resource_date
+  )
+
+  return(return_value)
+}
diff --git a/R/list_datasets.R b/R/list_datasets.R
@@ -0,0 +1,18 @@
+#' Lists all available datasets
+#'
+#' `list_datasets()` shows all of the datasets hosted on the phs open data platform.
+#'
+#' @return A tibble.
+#' @export
+#'
+#' @examples
+#' head(list_datasets())
+list_datasets <- function() {
+  # fetch the data
+  content <- phs_GET("package_list", "")
+
+  data_sets <- tibble::tibble("name" = unlist(content$result))
+
+
+  return(data_sets)
+}
diff --git a/R/list_resources.R b/R/list_resources.R
@@ -0,0 +1,43 @@
+#' Lists all available resources for a dataset
+#'
+#' `list_resources()` returns all of the resources associated
+#' with a dataset
+#'
+#' @inheritParams get_dataset
+#'
+#' @return a [tibble][tibble::tibble-package] with the data
+#' @export
+#'
+#' @examples
+#' list_resources("weekly-accident-and-emergency-activity-and-waiting-times")
+list_resources <- function(dataset_name) {
+  # throw error if name type/format is invalid
+  check_dataset_name(dataset_name)
+
+  # define query and try API call
+  query <- list("id" = dataset_name)
+  content <- try(
+    phs_GET("package_show", query),
+    silent = TRUE
+  )
+
+  # if content contains a 'Not Found Error'
+  # throw error with suggested dataset name
+  if (grepl("Not Found Error", content[1])) {
+    suggest_dataset_name(dataset_name)
+  }
+
+  # define list of resource IDs names date created and date modified within dataset
+  all_ids <- purrr::map_chr(content$result$resources, ~ .x$id)
+  all_names <- purrr::map_chr(content$result$resources, ~ .x$name)
+  all_date_created <- purrr::map_chr(content$result$resources, ~ .x$created) %>%
+    as.POSIXct(format = "%FT%X", tz = "UTC")
+  all_date_modified <- purrr::map_chr(content$result$resources, ~ .x$last_modified) %>%
+    as.POSIXct(format = "%FT%X", tz = "UTC")
+  return_value <- tibble::tibble(
+    "res_id" = all_ids, "name" = all_names,
+    "created" = all_date_created, "last_modified" = all_date_modified
+  )
+
+  return(return_value)
+}
diff --git a/man/get_dataset_additional_info.Rd b/man/get_dataset_additional_info.Rd
diff --git a/man/list_datasets.Rd b/man/list_datasets.Rd
diff --git a/man/list_resources.Rd b/man/list_resources.Rd
diff --git a/tests/testthat/test-get_dataset_additional_info.R b/tests/testthat/test-get_dataset_additional_info.R
@@ -0,0 +1,9 @@
+skip_if_offline(host = "www.opendata.nhs.scot")
+
+test_that("returns data in the expected format", {
+  dataset <- get_dataset_additional_info("weekly-accident-and-emergency-activity-and-waiting-times")
+
+  expect_s3_class(dataset, "tbl_df")
+  expect_equal(nrow(dataset), 1)
+  expect_named(dataset, c("name", "n_resources", "last_updated"))
+})
diff --git a/tests/testthat/test-list_datasets.R b/tests/testthat/test-list_datasets.R
@@ -0,0 +1,13 @@
+skip_if_offline(host = "www.opendata.nhs.scot")
+
+test_that("returns more than 0 datasets", {
+  expect_gte(nrow(list_datasets()), 1)
+})
+
+test_that("returns data in the expected format", {
+  data <- list_datasets()
+
+  expect_s3_class(data, "tbl_df")
+  expect_named(data, "name")
+  expect_equal(dplyr::n_distinct(data[["name"]]), nrow(data))
+})
diff --git a/tests/testthat/test-list_resources.R b/tests/testthat/test-list_resources.R
@@ -0,0 +1,17 @@
+skip_if_offline(host = "www.opendata.nhs.scot")
+
+test_that("returns data in the expected format", {
+  data <- list_resources("diagnostic-waiting-times")
+
+  expect_s3_class(data, "tbl_df")
+  expect_named(data, c("res_id", "name", "created", "last_modified"))
+  expect_equal(dplyr::n_distinct(data[["res_id"]]), nrow(data))
+  expect_equal(dplyr::n_distinct(data[["name"]]), nrow(data))
+})
+
+test_that("returns errors properly", {
+  expect_error(list_resources(), "argument \"dataset_name\" is missing, with no default$")
+  expect_error(list_resources("bad_name"), "dataset_name must be in dash-case")
+  expect_error(list_resources("incorrect-name"), "Can't find the dataset name")
+  expect_error(list_resources("diagnostic-waiting-time"), "diagnostic-waiting-times")
+})