-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #36 from Public-Health-Scotland/get_latest_resource
Add a function to return the latest resource from a dataset
- Loading branch information
Showing
8 changed files
with
253 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
#' Get the latest resource from a data set | ||
#' | ||
#' Returns the latest resource available in a dataset. | ||
#' | ||
#' There are some datasets on the open data platform that | ||
#' keep historic resources instead of updating existing ones. | ||
#' For these it is useful to be able to retrieve the latest | ||
#' resource. As of 1.8.2024 these data sets include: | ||
#' * gp-practice-populations | ||
#' * gp-practice-contact-details-and-list-sizes | ||
#' * nhsscotland-payments-to-general-practice | ||
#' * dental-practices-and-patient-registrations | ||
#' * general-practitioner-contact-details | ||
#' * prescribed-dispensed | ||
#' * dispenser-location-contact-details | ||
#' * community-pharmacy-contractor-activity | ||
#' | ||
#' @inheritParams get_dataset | ||
#' @inheritParams get_resource | ||
#' | ||
#' @return a [tibble][tibble::tibble-package] with the data | ||
#' @export | ||
#' | ||
#' @examples | ||
#' dataset_name <- "gp-practice-contact-details-and-list-sizes" | ||
#' | ||
#' data <- get_latest_resource(dataset_name) | ||
#' | ||
#' filters <- list("Postcode" = "DD11 1ES") | ||
#' wanted_cols <- c("PracticeCode", "Postcode", "Dispensing") | ||
#' | ||
#' filtered_data <- get_latest_resource( | ||
#' dataset_name = dataset_name, | ||
#' row_filters = filters, | ||
#' col_select = wanted_cols | ||
#' ) | ||
#' | ||
get_latest_resource <- function(dataset_name, | ||
rows = NULL, | ||
row_filters = NULL, | ||
col_select = NULL, | ||
include_context = TRUE) { | ||
applicable_datasets <- c( | ||
"community-pharmacy-contractor-activity", | ||
"dental-practices-and-patient-registrations", | ||
"dispenser-location-contact-details", | ||
"general-practitioner-contact-details", | ||
"gp-practice-contact-details-and-list-sizes", | ||
"gp-practice-populations", | ||
"nhsscotland-payments-to-general-practice", | ||
"prescribed-dispensed" | ||
) | ||
|
||
# check if data set is within applicable datasets | ||
# throw error if not | ||
if (!dataset_name %in% applicable_datasets) { | ||
cli::cli_abort( | ||
c( | ||
"The dataset name supplied {.val {dataset_name}} is not within the applicable datasets. | ||
These are: {.val {applicable_datasets}}", | ||
"x" = "Please see {.fun get_latest_resource} documentation.", | ||
"i" = "You can find dataset names in the URL | ||
of a dataset's page on {.url www.opendata.nhs.scot}." | ||
), | ||
call = rlang::caller_env() | ||
) | ||
} | ||
|
||
|
||
# get the latest resource id | ||
id <- get_latest_resource_id(dataset_name) | ||
|
||
data <- get_resource( | ||
res_id = id, | ||
rows = rows, | ||
row_filters = row_filters, | ||
col_select = col_select, | ||
include_context = include_context | ||
) | ||
|
||
return(data) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#' get_latest_resource_id | ||
#' | ||
#' to be confident that the resource returned is the one intended | ||
#' two conditions have to be met. It has to appear at the top of | ||
#' of the resource list as shown on the open data platform. | ||
#' The order they are returned via the api is the same | ||
#' as they appear on the open data platform. It also | ||
#' has to have the most recent date created | ||
#' | ||
#' There are only some datasets that this functionality | ||
#' is relevant to, these are listed within applicable | ||
#' datasets and are the datasets that keep historic | ||
#' resources instead of over writing them. | ||
#' | ||
#' @inheritParams get_dataset | ||
#' | ||
#' @return a string with the resource id | ||
get_latest_resource_id <- function(dataset_name) { | ||
# send the api request | ||
query <- list("id" = dataset_name) | ||
content <- phs_GET("package_show", query) | ||
|
||
# retrieve the resource id's from returned contect | ||
all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) | ||
|
||
|
||
# add the id, created date and last_modified to a dataframe | ||
id <- c() | ||
created_date <- c() | ||
modified_date <- c() | ||
|
||
for (res in content$result$resources) { | ||
id <- append(id, res$id) | ||
created_date <- append(created_date, res$created) | ||
modified_date <- append(modified_date, res$last_modified) | ||
} | ||
all_id_data <- tibble::tibble( | ||
id = id, | ||
created_date = strptime(created_date, format = "%FT%X", tz = "UTC"), | ||
modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC") | ||
) %>% | ||
dplyr::mutate(most_recent_date_created = max(created_date)) | ||
|
||
# get the first row of the resources, this will be the same that appears on the top | ||
# on the open data platform | ||
all_id_data_first_row <- all_id_data %>% | ||
dplyr::slice(1) | ||
|
||
# If the resource at the top as appearing on the open data platform also has the most | ||
# recent date created, return it. Otherwise, error | ||
if (all_id_data_first_row$created_date == all_id_data_first_row$most_recent_date_created) { | ||
return(all_id_data_first_row$id) | ||
} | ||
cli::cli_abort("The most recent id could not be identified") | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
test_that("returns data for a dataset that is listed", { | ||
expect_no_error(get_latest_resource("gp-practice-populations")) | ||
}) | ||
|
||
test_that("returns error for a dataset that is not listed", { | ||
expect_error(get_latest_resource("hospital-codes")) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
test_that("returns data for a dataset that is listed", { | ||
expect_no_error(get_latest_resource("gp-practice-populations")) | ||
}) | ||
|
||
test_that("returns error for a dataset that is not listed", { | ||
expect_error(get_latest_resource("hospital-codes")) | ||
}) |