Skip to content

Commit

Permalink
Merge pull request #32 from Public-Health-Scotland/issue10
Browse files Browse the repository at this point in the history
Issue10
  • Loading branch information
csillasch authored Aug 29, 2024
2 parents 03471e5 + c1be162 commit e8196b1
Show file tree
Hide file tree
Showing 12 changed files with 213 additions and 1 deletion.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ Version: 0.1.0.9000
Authors@R: c(
person("Csilla", "Scharle", , "[email protected]", role = c("aut", "cre")),
person("James", "McMahon", , "[email protected]", role = "aut"),
person("David", "Aikman", , "[email protected]", role = "aut")
person("David", "Aikman", , "[email protected]", role = "aut"),
person("Ross", "Hull", , "[email protected]", role = "aut")
)
Description: Functions to extract and interact with data from the Scottish
Health and Social Care Open Data platform.
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

export("%>%")
export(get_dataset)
export(get_dataset_additional_info)
export(get_latest_resource)
export(get_resource)
export(get_resource_sql)
export(list_datasets)
export(list_resources)
importFrom(magrittr,"%>%")
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ dates to returned data (#24).
- `get_dataset()` will now suggest multiple dataset names, when the dataset
you've asked for doesn't exist (i.e. there's a typo) and there are multiple
likely candidates (#28).
- Two new functions `list_datasets()` and `list_resources()` allow browsing
available datasets and resources (#10).

# phsopendata 0.1.0 (2021-07-22)

Expand Down
43 changes: 43 additions & 0 deletions R/get_dataset_additional_info.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#' get a datasets additional info
#'
#' `get_dataset_additional_info()` returns a tibble of dataset names along with
#' the amount of resources it has and the date it was last updated.Last updated
#' is taken to mean the most recent date a resource within the dataset was
#' created or modified.
#'
#' @inheritParams get_dataset
#'
#' @return a [tibble][tibble::tibble-package] with the data
#' @export
#' @examples
#' get_dataset_additional_info("gp-practice-populations")
get_dataset_additional_info <- function(dataset_name) {
# define query

query <- list("id" = dataset_name)
# fetch the data
content <- phs_GET("package_show", query)

# get the amount of resources
amount_of_resources <- content$result$resources %>%
length()


# get the last recourse created and modified dates
last_resource_created_date <- purrr::map_chr(content$result$resources, ~ .$created)

Check warning on line 27 in R/get_dataset_additional_info.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_dataset_additional_info.R,line=27,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 85 characters.
last_resource_modified_date <- purrr::map_chr(content$result$resources, ~ .$last_modified)

Check warning on line 28 in R/get_dataset_additional_info.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_dataset_additional_info.R,line=28,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 92 characters.

# get the latest between the created and modified dates and change to datetime format

Check warning on line 30 in R/get_dataset_additional_info.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_dataset_additional_info.R,line=30,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 87 characters.
most_recent_resource_date <- max(last_resource_modified_date, last_resource_created_date) %>%

Check warning on line 31 in R/get_dataset_additional_info.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_dataset_additional_info.R,line=31,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 95 characters.
as.POSIXct(format = "%FT%X", tz = "UTC")


# create tibble to return
return_value <- tibble::tibble(
"name" = dataset_name,
"n_resources" = amount_of_resources,
"last_updated" = most_recent_resource_date
)

return(return_value)
}
18 changes: 18 additions & 0 deletions R/list_datasets.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#' Lists all available datasets
#'
#' `list_datasets()` shows all of the datasets hosted on the phs open data platform.
#'
#' @return A tibble.
#' @export
#'
#' @examples
#' head(list_datasets())
list_datasets <- function() {
# fetch the data
content <- phs_GET("package_list", "")

data_sets <- tibble::tibble("name" = unlist(content$result))


return(data_sets)
}
43 changes: 43 additions & 0 deletions R/list_resources.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#' Lists all available resources for a dataset
#'
#' `list_resources()` returns all of the resources associated
#' with a dataset
#'
#' @inheritParams get_dataset
#'
#' @return a [tibble][tibble::tibble-package] with the data
#' @export
#'
#' @examples
#' list_resources("weekly-accident-and-emergency-activity-and-waiting-times")
list_resources <- function(dataset_name) {
# throw error if name type/format is invalid
check_dataset_name(dataset_name)

# define query and try API call
query <- list("id" = dataset_name)
content <- try(
phs_GET("package_show", query),
silent = TRUE
)

# if content contains a 'Not Found Error'
# throw error with suggested dataset name
if (grepl("Not Found Error", content[1])) {
suggest_dataset_name(dataset_name)
}

# define list of resource IDs names date created and date modified within dataset
all_ids <- purrr::map_chr(content$result$resources, ~ .x$id)
all_names <- purrr::map_chr(content$result$resources, ~ .x$name)
all_date_created <- purrr::map_chr(content$result$resources, ~ .x$created) %>%
as.POSIXct(format = "%FT%X", tz = "UTC")
all_date_modified <- purrr::map_chr(content$result$resources, ~ .x$last_modified) %>%
as.POSIXct(format = "%FT%X", tz = "UTC")
return_value <- tibble::tibble(
"res_id" = all_ids, "name" = all_names,
"created" = all_date_created, "last_modified" = all_date_modified
)

return(return_value)
}
24 changes: 24 additions & 0 deletions man/get_dataset_additional_info.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/list_datasets.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/list_resources.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions tests/testthat/test-get_dataset_additional_info.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
skip_if_offline(host = "www.opendata.nhs.scot")

test_that("returns data in the expected format", {
dataset <- get_dataset_additional_info("weekly-accident-and-emergency-activity-and-waiting-times")

expect_s3_class(dataset, "tbl_df")
expect_equal(nrow(dataset), 1)
expect_named(dataset, c("name", "n_resources", "last_updated"))
})
13 changes: 13 additions & 0 deletions tests/testthat/test-list_datasets.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
skip_if_offline(host = "www.opendata.nhs.scot")

test_that("returns more than 0 datasets", {
expect_gte(nrow(list_datasets()), 1)
})

test_that("returns data in the expected format", {
data <- list_datasets()

expect_s3_class(data, "tbl_df")
expect_named(data, "name")
expect_equal(dplyr::n_distinct(data[["name"]]), nrow(data))
})
17 changes: 17 additions & 0 deletions tests/testthat/test-list_resources.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
skip_if_offline(host = "www.opendata.nhs.scot")

test_that("returns data in the expected format", {
data <- list_resources("diagnostic-waiting-times")

expect_s3_class(data, "tbl_df")
expect_named(data, c("res_id", "name", "created", "last_modified"))
expect_equal(dplyr::n_distinct(data[["res_id"]]), nrow(data))
expect_equal(dplyr::n_distinct(data[["name"]]), nrow(data))
})

test_that("returns errors properly", {
expect_error(list_resources(), "argument \"dataset_name\" is missing, with no default$")
expect_error(list_resources("bad_name"), "dataset_name must be in dash-case")
expect_error(list_resources("incorrect-name"), "Can't find the dataset name")
expect_error(list_resources("diagnostic-waiting-time"), "diagnostic-waiting-times")
})

0 comments on commit e8196b1

Please sign in to comment.