Skip to content

Commit

Permalink
Merge pull request #36 from Public-Health-Scotland/get_latest_resource
Browse files Browse the repository at this point in the history
Add a function to return the latest resource from a dataset
  • Loading branch information
Moohan authored Aug 1, 2024
2 parents 8f3f560 + a82b253 commit 22f73ab
Show file tree
Hide file tree
Showing 8 changed files with 253 additions and 1 deletion.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Imports:
magrittr (>= 1.0.0),
purrr,
readr (>= 1.0.0),
rlang (>= 1.0.0),
stringdist,
tibble (>= 3.0.0),
xml2
Expand All @@ -31,4 +32,4 @@ Config/testthat/parallel: true
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export("%>%")
export(get_dataset)
export(get_latest_resource)
export(get_resource)
export(get_resource_sql)
importFrom(magrittr,"%>%")
82 changes: 82 additions & 0 deletions R/get_latest_resource.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#' Get the latest resource from a data set
#'
#' Returns the latest resource available in a dataset.
#'
#' There are some datasets on the open data platform that
#' keep historic resources instead of updating existing ones.
#' For these it is useful to be able to retrieve the latest
#' resource. As of 1.8.2024 these data sets include:
#' * gp-practice-populations
#' * gp-practice-contact-details-and-list-sizes
#' * nhsscotland-payments-to-general-practice
#' * dental-practices-and-patient-registrations
#' * general-practitioner-contact-details
#' * prescribed-dispensed
#' * dispenser-location-contact-details
#' * community-pharmacy-contractor-activity
#'
#' @inheritParams get_dataset
#' @inheritParams get_resource
#'
#' @return a [tibble][tibble::tibble-package] with the data
#' @export
#'
#' @examples
#' dataset_name <- "gp-practice-contact-details-and-list-sizes"
#'
#' data <- get_latest_resource(dataset_name)
#'
#' filters <- list("Postcode" = "DD11 1ES")
#' wanted_cols <- c("PracticeCode", "Postcode", "Dispensing")
#'
#' filtered_data <- get_latest_resource(
#' dataset_name = dataset_name,
#' row_filters = filters,
#' col_select = wanted_cols
#' )
#'
get_latest_resource <- function(dataset_name,
rows = NULL,
row_filters = NULL,
col_select = NULL,
include_context = TRUE) {
applicable_datasets <- c(
"community-pharmacy-contractor-activity",
"dental-practices-and-patient-registrations",
"dispenser-location-contact-details",
"general-practitioner-contact-details",
"gp-practice-contact-details-and-list-sizes",
"gp-practice-populations",
"nhsscotland-payments-to-general-practice",
"prescribed-dispensed"
)

# check if data set is within applicable datasets
# throw error if not
if (!dataset_name %in% applicable_datasets) {
cli::cli_abort(
c(
"The dataset name supplied {.val {dataset_name}} is not within the applicable datasets.

Check warning on line 59 in R/get_latest_resource.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_latest_resource.R,line=59,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 95 characters.
These are: {.val {applicable_datasets}}",
"x" = "Please see {.fun get_latest_resource} documentation.",
"i" = "You can find dataset names in the URL
of a dataset's page on {.url www.opendata.nhs.scot}."
),
call = rlang::caller_env()
)
}


# get the latest resource id
id <- get_latest_resource_id(dataset_name)

data <- get_resource(
res_id = id,
rows = rows,
row_filters = row_filters,
col_select = col_select,
include_context = include_context
)

return(data)
}
55 changes: 55 additions & 0 deletions R/get_latest_resource_id.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#' get_latest_resource_id
#'
#' to be confident that the resource returned is the one intended
#' two conditions have to be met. It has to appear at the top of
#' of the resource list as shown on the open data platform.
#' The order they are returned via the api is the same
#' as they appear on the open data platform. It also
#' has to have the most recent date created
#'
#' There are only some datasets that this functionality
#' is relevant to, these are listed within applicable
#' datasets and are the datasets that keep historic
#' resources instead of over writing them.
#'
#' @inheritParams get_dataset
#'
#' @return a string with the resource id
get_latest_resource_id <- function(dataset_name) {
# send the api request
query <- list("id" = dataset_name)
content <- phs_GET("package_show", query)

# retrieve the resource id's from returned contect
all_ids <- purrr::map_chr(content$result$resources, ~ .x$id)

Check warning on line 24 in R/get_latest_resource_id.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_latest_resource_id.R,line=24,col=3,[object_usage_linter] local variable 'all_ids' assigned but may not be used


# add the id, created date and last_modified to a dataframe
id <- c()
created_date <- c()
modified_date <- c()

for (res in content$result$resources) {
id <- append(id, res$id)
created_date <- append(created_date, res$created)
modified_date <- append(modified_date, res$last_modified)
}
all_id_data <- tibble::tibble(
id = id,
created_date = strptime(created_date, format = "%FT%X", tz = "UTC"),
modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC")
) %>%
dplyr::mutate(most_recent_date_created = max(created_date))

# get the first row of the resources, this will be the same that appears on the top

Check warning on line 44 in R/get_latest_resource_id.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_latest_resource_id.R,line=44,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 85 characters.
# on the open data platform
all_id_data_first_row <- all_id_data %>%
dplyr::slice(1)

# If the resource at the top as appearing on the open data platform also has the most

Check warning on line 49 in R/get_latest_resource_id.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_latest_resource_id.R,line=49,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 87 characters.
# recent date created, return it. Otherwise, error
if (all_id_data_first_row$created_date == all_id_data_first_row$most_recent_date_created) {

Check warning on line 51 in R/get_latest_resource_id.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_latest_resource_id.R,line=51,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 93 characters.
return(all_id_data_first_row$id)
}
cli::cli_abort("The most recent id could not be identified")
}
70 changes: 70 additions & 0 deletions man/get_latest_resource.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions man/get_latest_resource_id.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions tests/testthat/test-get_latest_resource.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
test_that("returns data for a dataset that is listed", {
expect_no_error(get_latest_resource("gp-practice-populations"))
})

test_that("returns error for a dataset that is not listed", {
expect_error(get_latest_resource("hospital-codes"))
})
7 changes: 7 additions & 0 deletions tests/testthat/test-get_latest_resource_id.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
test_that("returns data for a dataset that is listed", {
expect_no_error(get_latest_resource("gp-practice-populations"))
})

test_that("returns error for a dataset that is not listed", {
expect_error(get_latest_resource("hospital-codes"))
})

0 comments on commit 22f73ab

Please sign in to comment.