From afb5d6438480a514341796d55128f7a79a93ac35 Mon Sep 17 00:00:00 2001 From: ross hull Date: Wed, 1 May 2024 16:46:27 +0100 Subject: [PATCH 01/93] add file get_latest_resource.R --- R/get_latest_resource.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 R/get_latest_resource.R diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R new file mode 100644 index 0000000..4d3ff80 --- /dev/null +++ b/R/get_latest_resource.R @@ -0,0 +1,21 @@ +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# RStudio Workbench is strictly for use by Public Health Scotland staff and +# authorised users only, and is governed by the Acceptable Usage Policy https://github.com/Public-Health-Scotland/R-Resources/blob/master/posit_workbench_acceptable_use_policy.md. +# +# This is a shared resource and is hosted on a pay-as-you-go cloud computing +# platform. Your usage will incur direct financial cost to Public Health +# Scotland. As such, please ensure +# +# 1. that this session is appropriately sized with the minimum number of CPUs +# and memory required for the size and scale of your analysis; +# 2. the code you write in this script is optimal and only writes out the +# data required, nothing more. +# 3. you close this session when not in use; idle sessions still cost PHS +# money! +# +# For further guidance, please see https://github.com/Public-Health-Scotland/R-Resources/blob/master/posit_workbench_best_practice_with_r.md. +# +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + + From 8ac21bd90824be12251521b0c1187c0b442797bf Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 2 May 2024 16:53:43 +0100 Subject: [PATCH 02/93] create initial basic get_latest_resource.R --- R/get_latest_resource.R | 69 +++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 16 deletions(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index 4d3ff80..6cd08c4 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -1,21 +1,58 @@ -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -# RStudio Workbench is strictly for use by Public Health Scotland staff and -# authorised users only, and is governed by the Acceptable Usage Policy https://github.com/Public-Health-Scotland/R-Resources/blob/master/posit_workbench_acceptable_use_policy.md. -# -# This is a shared resource and is hosted on a pay-as-you-go cloud computing -# platform. Your usage will incur direct financial cost to Public Health -# Scotland. As such, please ensure -# -# 1. that this session is appropriately sized with the minimum number of CPUs -# and memory required for the size and scale of your analysis; -# 2. the code you write in this script is optimal and only writes out the -# data required, nothing more. -# 3. you close this session when not in use; idle sessions still cost PHS -# money! +get_latest_resource <- function(dataset_name, max_resources = NULL, rows = NULL){ + # throw error if name type/format is invalid + check_dataset_name(dataset_name) + + # define query and try API call + query <- list("id" = dataset_name) + content <- try( + phs_GET("package_show", query), + silent = TRUE + ) + + # if content contains a 'Not Found Error' + # throw error with suggested dataset name + if (grepl("Not Found Error", content[1])) { + suggest_dataset_name(dataset_name) + } + + query <- list("id" = dataset_name) + content <- try( + phs_GET("package_show", query), + silent = TRUE + ) + + all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) + res_index <- 1:length(all_ids) + + id <- c() + created_date <- c() + modified_date <- c() + + for(i in content$result$resources){ + id <- append(id, i$id) + created_date <- append(created_date, i$created) + modified_date <- append(modified_date, i$last_modified) + } + + + all_id_data <- list(id = id, + created_date = strptime(created_date, format = "%FT%X", tz = "UTC"), + modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC")) + + print(all_id_data) + + #all_id_data + + #df <- data.frame(all_id_data) %>% + # subset(created_date == max(created_date)) # -# For further guidance, please see https://github.com/Public-Health-Scotland/R-Resources/blob/master/posit_workbench_best_practice_with_r.md. + #df$id # -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +} + + + + From 7b443cbfa8bcc37c70cbe90439ebf8bdb04614a5 Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 2 May 2024 16:54:04 +0100 Subject: [PATCH 03/93] create initial basic get_latest_resource.R --- R/get_latest_resource.R | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index 6cd08c4..002168b 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -39,15 +39,12 @@ get_latest_resource <- function(dataset_name, max_resources = NULL, rows = NULL) created_date = strptime(created_date, format = "%FT%X", tz = "UTC"), modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC")) - print(all_id_data) - #all_id_data + df <- data.frame(all_id_data) %>% + subset(created_date == max(created_date)) + + df$id - #df <- data.frame(all_id_data) %>% - # subset(created_date == max(created_date)) -# - #df$id -# } From fe42f0f0e9a60a0a9ff931262c1c9010e4df6113 Mon Sep 17 00:00:00 2001 From: ross hull Date: Fri, 3 May 2024 13:35:00 +0100 Subject: [PATCH 04/93] add applicable_datasets list and throw error if dataset is not in list --- R/get_latest_resource.R | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index 002168b..da20e97 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -1,4 +1,9 @@ -get_latest_resource <- function(dataset_name, max_resources = NULL, rows = NULL){ +get_latest_resource <- function(dataset_name, rows = NULL){ + applicable_datasets <- c("gp-practice-populations", "gp-practice-contact-details-and-list-sizes", + "nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations", + "general-practitioner-contact-details", "prescribed-dispensed", + "prescriptions-in-the-community", "community-pharmacy-contractor-activity") + # throw error if name type/format is invalid check_dataset_name(dataset_name) @@ -9,21 +14,36 @@ get_latest_resource <- function(dataset_name, max_resources = NULL, rows = NULL) silent = TRUE ) + + #check if data set is within applicable datasets + #throw error if not + if(!dataset_name %in% applicable_datasets){ + cli::cli_abort(c( + "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets", + "x" = "Please see documentation.", + "i" = "You can find dataset names in the URL + of a dataset's page on {.url www.opendata.nhs.scot}." + )) + } + # if content contains a 'Not Found Error' # throw error with suggested dataset name if (grepl("Not Found Error", content[1])) { suggest_dataset_name(dataset_name) } + #sned the api request query <- list("id" = dataset_name) content <- try( phs_GET("package_show", query), silent = TRUE ) + #retrieve the resource id's from returned contect all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) - res_index <- 1:length(all_ids) + + #add the id, created date and last_modified to a dataframe id <- c() created_date <- c() modified_date <- c() @@ -33,17 +53,15 @@ get_latest_resource <- function(dataset_name, max_resources = NULL, rows = NULL) created_date <- append(created_date, i$created) modified_date <- append(modified_date, i$last_modified) } - - all_id_data <- list(id = id, created_date = strptime(created_date, format = "%FT%X", tz = "UTC"), modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC")) - + #filter for the id with the most recent created date df <- data.frame(all_id_data) %>% subset(created_date == max(created_date)) - df$id + return(df$id) } From 3fab708ddf73fd3c6cee1e9e6b0135b620e569a7 Mon Sep 17 00:00:00 2001 From: ross hull Date: Mon, 6 May 2024 13:45:19 +0100 Subject: [PATCH 05/93] change name of file and function to include id, add a test file --- R/get_latest_resource_id.R | 73 ++++++++++++++++++++ R/test-get_latest_resource_id.R | 21 ++++++ tests/testthat/test-get_latest_resource_id.R | 21 ++++++ 3 files changed, 115 insertions(+) create mode 100644 R/get_latest_resource_id.R create mode 100644 R/test-get_latest_resource_id.R create mode 100644 tests/testthat/test-get_latest_resource_id.R diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R new file mode 100644 index 0000000..6a50705 --- /dev/null +++ b/R/get_latest_resource_id.R @@ -0,0 +1,73 @@ +get_latest_resource_id <- function(dataset_name, rows = NULL){ + applicable_datasets <- c("gp-practice-populations", "gp-practice-contact-details-and-list-sizes", + "nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations", + "general-practitioner-contact-details", "prescribed-dispensed", + "prescriptions-in-the-community", "community-pharmacy-contractor-activity") + + # throw error if name type/format is invalid + check_dataset_name(dataset_name) + + # define query and try API call + query <- list("id" = dataset_name) + content <- try( + phs_GET("package_show", query), + silent = TRUE + ) + + + #check if data set is within applicable datasets + #throw error if not + if(!dataset_name %in% applicable_datasets){ + cli::cli_abort(c( + "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets", + "x" = "Please see documentation.", + "i" = "You can find dataset names in the URL + of a dataset's page on {.url www.opendata.nhs.scot}." + )) + } + + # if content contains a 'Not Found Error' + # throw error with suggested dataset name + if (grepl("Not Found Error", content[1])) { + suggest_dataset_name(dataset_name) + } + + #sned the api request + query <- list("id" = dataset_name) + content <- try( + phs_GET("package_show", query), + silent = TRUE + ) + + #retrieve the resource id's from returned contect + all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) + + + #add the id, created date and last_modified to a dataframe + id <- c() + created_date <- c() + modified_date <- c() + + for(i in content$result$resources){ + id <- append(id, i$id) + created_date <- append(created_date, i$created) + modified_date <- append(modified_date, i$last_modified) + } + all_id_data <- list(id = id, + created_date = strptime(created_date, format = "%FT%X", tz = "UTC"), + modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC")) + + #filter for the id with the most recent created date + df <- data.frame(all_id_data) %>% + subset(created_date == max(created_date)) + + return(df$id) + +} + + + + + + + diff --git a/R/test-get_latest_resource_id.R b/R/test-get_latest_resource_id.R new file mode 100644 index 0000000..4d3ff80 --- /dev/null +++ b/R/test-get_latest_resource_id.R @@ -0,0 +1,21 @@ +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# RStudio Workbench is strictly for use by Public Health Scotland staff and +# authorised users only, and is governed by the Acceptable Usage Policy https://github.com/Public-Health-Scotland/R-Resources/blob/master/posit_workbench_acceptable_use_policy.md. +# +# This is a shared resource and is hosted on a pay-as-you-go cloud computing +# platform. Your usage will incur direct financial cost to Public Health +# Scotland. As such, please ensure +# +# 1. that this session is appropriately sized with the minimum number of CPUs +# and memory required for the size and scale of your analysis; +# 2. the code you write in this script is optimal and only writes out the +# data required, nothing more. +# 3. you close this session when not in use; idle sessions still cost PHS +# money! +# +# For further guidance, please see https://github.com/Public-Health-Scotland/R-Resources/blob/master/posit_workbench_best_practice_with_r.md. +# +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + + diff --git a/tests/testthat/test-get_latest_resource_id.R b/tests/testthat/test-get_latest_resource_id.R new file mode 100644 index 0000000..4d3ff80 --- /dev/null +++ b/tests/testthat/test-get_latest_resource_id.R @@ -0,0 +1,21 @@ +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# RStudio Workbench is strictly for use by Public Health Scotland staff and +# authorised users only, and is governed by the Acceptable Usage Policy https://github.com/Public-Health-Scotland/R-Resources/blob/master/posit_workbench_acceptable_use_policy.md. +# +# This is a shared resource and is hosted on a pay-as-you-go cloud computing +# platform. Your usage will incur direct financial cost to Public Health +# Scotland. As such, please ensure +# +# 1. that this session is appropriately sized with the minimum number of CPUs +# and memory required for the size and scale of your analysis; +# 2. the code you write in this script is optimal and only writes out the +# data required, nothing more. +# 3. you close this session when not in use; idle sessions still cost PHS +# money! +# +# For further guidance, please see https://github.com/Public-Health-Scotland/R-Resources/blob/master/posit_workbench_best_practice_with_r.md. +# +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + + From 701bb33ddbc281123339d44ca8455325f0cd8ff4 Mon Sep 17 00:00:00 2001 From: ross hull Date: Mon, 6 May 2024 13:55:13 +0100 Subject: [PATCH 06/93] remove test file for the moment --- tests/testthat/test-get_latest_resource_id.R | 21 -------------------- 1 file changed, 21 deletions(-) delete mode 100644 tests/testthat/test-get_latest_resource_id.R diff --git a/tests/testthat/test-get_latest_resource_id.R b/tests/testthat/test-get_latest_resource_id.R deleted file mode 100644 index 4d3ff80..0000000 --- a/tests/testthat/test-get_latest_resource_id.R +++ /dev/null @@ -1,21 +0,0 @@ -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -# RStudio Workbench is strictly for use by Public Health Scotland staff and -# authorised users only, and is governed by the Acceptable Usage Policy https://github.com/Public-Health-Scotland/R-Resources/blob/master/posit_workbench_acceptable_use_policy.md. -# -# This is a shared resource and is hosted on a pay-as-you-go cloud computing -# platform. Your usage will incur direct financial cost to Public Health -# Scotland. As such, please ensure -# -# 1. that this session is appropriately sized with the minimum number of CPUs -# and memory required for the size and scale of your analysis; -# 2. the code you write in this script is optimal and only writes out the -# data required, nothing more. -# 3. you close this session when not in use; idle sessions still cost PHS -# money! -# -# For further guidance, please see https://github.com/Public-Health-Scotland/R-Resources/blob/master/posit_workbench_best_practice_with_r.md. -# -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - - From e490d18f3af400d3e10939478fa5b18da5179296 Mon Sep 17 00:00:00 2001 From: ross hull Date: Mon, 6 May 2024 14:44:20 +0100 Subject: [PATCH 07/93] remove rows argument from get latest resource id function --- R/get_latest_resource_id.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index 6a50705..cb1c99c 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -1,4 +1,4 @@ -get_latest_resource_id <- function(dataset_name, rows = NULL){ +get_latest_resource_id <- function(dataset_name){ applicable_datasets <- c("gp-practice-populations", "gp-practice-contact-details-and-list-sizes", "nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations", "general-practitioner-contact-details", "prescribed-dispensed", From c9923ca0903ccb466304deaad735d914aee87c25 Mon Sep 17 00:00:00 2001 From: ross hull Date: Tue, 23 Apr 2024 11:25:54 +0100 Subject: [PATCH 08/93] initial explore --- R/issue10.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 R/issue10.R diff --git a/R/issue10.R b/R/issue10.R new file mode 100644 index 0000000..bf0b457 --- /dev/null +++ b/R/issue10.R @@ -0,0 +1,23 @@ + +dataset_name <- "community-pharmacy-contractor-activity" + +# throw error if name type/format is invalid +check_dataset_name(dataset_name) + +# define query and try API call +query <- list("id" = dataset_name) +content <- try( + phs_GET("package_show", query), + silent = TRUE +) + +# if content contains a 'Not Found Error' +# throw error with suggested dataset name +if (grepl("Not Found Error", content[1])) { + suggest_dataset_name(dataset_name) +} + +# define list of resource IDs to get +all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) +all_names <- purrr::map_chr(content$result$resources, ~ .x$name) +return_value <- tibble("id" = all_ids, "names" = all_names) From 02d3fb4fca8fb18afae84b64d89e6c22742ee8c2 Mon Sep 17 00:00:00 2001 From: ross hull Date: Mon, 6 May 2024 15:22:43 +0100 Subject: [PATCH 09/93] remov file --- R/issue10.R | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 R/issue10.R diff --git a/R/issue10.R b/R/issue10.R deleted file mode 100644 index bf0b457..0000000 --- a/R/issue10.R +++ /dev/null @@ -1,23 +0,0 @@ - -dataset_name <- "community-pharmacy-contractor-activity" - -# throw error if name type/format is invalid -check_dataset_name(dataset_name) - -# define query and try API call -query <- list("id" = dataset_name) -content <- try( - phs_GET("package_show", query), - silent = TRUE -) - -# if content contains a 'Not Found Error' -# throw error with suggested dataset name -if (grepl("Not Found Error", content[1])) { - suggest_dataset_name(dataset_name) -} - -# define list of resource IDs to get -all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) -all_names <- purrr::map_chr(content$result$resources, ~ .x$name) -return_value <- tibble("id" = all_ids, "names" = all_names) From 78dc3bf1603aeed0ca2aa8f99cd412275f2ee4f9 Mon Sep 17 00:00:00 2001 From: ross hull Date: Mon, 6 May 2024 15:24:38 +0100 Subject: [PATCH 10/93] create show_resources --- R/get_resources.R | 24 ++++++++++++++++++++++++ R/show_resources.R | 24 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 R/get_resources.R create mode 100644 R/show_resources.R diff --git a/R/get_resources.R b/R/get_resources.R new file mode 100644 index 0000000..ec6fcc5 --- /dev/null +++ b/R/get_resources.R @@ -0,0 +1,24 @@ +show_resources <- function(dataset_name){ + # throw error if name type/format is invalid + check_dataset_name(dataset_name) + + # define query and try API call + query <- list("id" = dataset_name) + content <- try( + phs_GET("package_show", query), + silent = TRUE + ) + + # if content contains a 'Not Found Error' + # throw error with suggested dataset name + if (grepl("Not Found Error", content[1])) { + suggest_dataset_name(dataset_name) + } + + # define list of resource IDs and names within dataset + all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) + all_names <- purrr::map_chr(content$result$resources, ~ .x$name) + return_value <- list("id" = all_ids, "names" = all_names) + + return(return_value) + } diff --git a/R/show_resources.R b/R/show_resources.R new file mode 100644 index 0000000..ec6fcc5 --- /dev/null +++ b/R/show_resources.R @@ -0,0 +1,24 @@ +show_resources <- function(dataset_name){ + # throw error if name type/format is invalid + check_dataset_name(dataset_name) + + # define query and try API call + query <- list("id" = dataset_name) + content <- try( + phs_GET("package_show", query), + silent = TRUE + ) + + # if content contains a 'Not Found Error' + # throw error with suggested dataset name + if (grepl("Not Found Error", content[1])) { + suggest_dataset_name(dataset_name) + } + + # define list of resource IDs and names within dataset + all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) + all_names <- purrr::map_chr(content$result$resources, ~ .x$name) + return_value <- list("id" = all_ids, "names" = all_names) + + return(return_value) + } From 9ed793265ed5b8127051869c9268dab458a280a7 Mon Sep 17 00:00:00 2001 From: ross hull Date: Mon, 6 May 2024 15:31:01 +0100 Subject: [PATCH 11/93] create simple show datasets function --- R/show_datasets.R | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 R/show_datasets.R diff --git a/R/show_datasets.R b/R/show_datasets.R new file mode 100644 index 0000000..4b70559 --- /dev/null +++ b/R/show_datasets.R @@ -0,0 +1,5 @@ +show_datasets <- function(){ + data_sets <- phs_GET("package_list", "")$result + + return(data_sets) +} From 043abd89fea077f921c6342edff802b81c4533e7 Mon Sep 17 00:00:00 2001 From: ross-hull Date: Mon, 6 May 2024 14:35:56 +0000 Subject: [PATCH 12/93] Style code (GHA) --- R/get_resources.R | 4 ++-- R/show_datasets.R | 2 +- R/show_resources.R | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/get_resources.R b/R/get_resources.R index ec6fcc5..456032a 100644 --- a/R/get_resources.R +++ b/R/get_resources.R @@ -1,4 +1,4 @@ -show_resources <- function(dataset_name){ +show_resources <- function(dataset_name) { # throw error if name type/format is invalid check_dataset_name(dataset_name) @@ -21,4 +21,4 @@ show_resources <- function(dataset_name){ return_value <- list("id" = all_ids, "names" = all_names) return(return_value) - } +} diff --git a/R/show_datasets.R b/R/show_datasets.R index 4b70559..edda707 100644 --- a/R/show_datasets.R +++ b/R/show_datasets.R @@ -1,4 +1,4 @@ -show_datasets <- function(){ +show_datasets <- function() { data_sets <- phs_GET("package_list", "")$result return(data_sets) diff --git a/R/show_resources.R b/R/show_resources.R index ec6fcc5..456032a 100644 --- a/R/show_resources.R +++ b/R/show_resources.R @@ -1,4 +1,4 @@ -show_resources <- function(dataset_name){ +show_resources <- function(dataset_name) { # throw error if name type/format is invalid check_dataset_name(dataset_name) @@ -21,4 +21,4 @@ show_resources <- function(dataset_name){ return_value <- list("id" = all_ids, "names" = all_names) return(return_value) - } +} From 6f41b83ff1b1cf2b399ccf2de53cffa06fd8c7f4 Mon Sep 17 00:00:00 2001 From: ross hull Date: Tue, 7 May 2024 12:35:43 +0100 Subject: [PATCH 13/93] remove get_resources.R file --- R/get_resources.R | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 R/get_resources.R diff --git a/R/get_resources.R b/R/get_resources.R deleted file mode 100644 index 456032a..0000000 --- a/R/get_resources.R +++ /dev/null @@ -1,24 +0,0 @@ -show_resources <- function(dataset_name) { - # throw error if name type/format is invalid - check_dataset_name(dataset_name) - - # define query and try API call - query <- list("id" = dataset_name) - content <- try( - phs_GET("package_show", query), - silent = TRUE - ) - - # if content contains a 'Not Found Error' - # throw error with suggested dataset name - if (grepl("Not Found Error", content[1])) { - suggest_dataset_name(dataset_name) - } - - # define list of resource IDs and names within dataset - all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) - all_names <- purrr::map_chr(content$result$resources, ~ .x$name) - return_value <- list("id" = all_ids, "names" = all_names) - - return(return_value) -} From 7ec3e049f9e1c2becf12bd42ceff5d0388946e8d Mon Sep 17 00:00:00 2001 From: ross hull Date: Wed, 8 May 2024 16:33:48 +0100 Subject: [PATCH 14/93] delete get_latest_resource.R --- R/get_latest_resource.R | 73 ----------------------------------------- 1 file changed, 73 deletions(-) delete mode 100644 R/get_latest_resource.R diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R deleted file mode 100644 index da20e97..0000000 --- a/R/get_latest_resource.R +++ /dev/null @@ -1,73 +0,0 @@ -get_latest_resource <- function(dataset_name, rows = NULL){ - applicable_datasets <- c("gp-practice-populations", "gp-practice-contact-details-and-list-sizes", - "nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations", - "general-practitioner-contact-details", "prescribed-dispensed", - "prescriptions-in-the-community", "community-pharmacy-contractor-activity") - - # throw error if name type/format is invalid - check_dataset_name(dataset_name) - - # define query and try API call - query <- list("id" = dataset_name) - content <- try( - phs_GET("package_show", query), - silent = TRUE - ) - - - #check if data set is within applicable datasets - #throw error if not - if(!dataset_name %in% applicable_datasets){ - cli::cli_abort(c( - "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets", - "x" = "Please see documentation.", - "i" = "You can find dataset names in the URL - of a dataset's page on {.url www.opendata.nhs.scot}." - )) - } - - # if content contains a 'Not Found Error' - # throw error with suggested dataset name - if (grepl("Not Found Error", content[1])) { - suggest_dataset_name(dataset_name) - } - - #sned the api request - query <- list("id" = dataset_name) - content <- try( - phs_GET("package_show", query), - silent = TRUE - ) - - #retrieve the resource id's from returned contect - all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) - - - #add the id, created date and last_modified to a dataframe - id <- c() - created_date <- c() - modified_date <- c() - - for(i in content$result$resources){ - id <- append(id, i$id) - created_date <- append(created_date, i$created) - modified_date <- append(modified_date, i$last_modified) - } - all_id_data <- list(id = id, - created_date = strptime(created_date, format = "%FT%X", tz = "UTC"), - modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC")) - - #filter for the id with the most recent created date - df <- data.frame(all_id_data) %>% - subset(created_date == max(created_date)) - - return(df$id) - -} - - - - - - - From 9dc2d5fec301fd9dfdac5fd91ff4c9f58dfa2492 Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 9 May 2024 16:27:37 +0100 Subject: [PATCH 15/93] change return type of show_resources to be tibble and add created and last modified variables --- R/show_resources.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/show_resources.R b/R/show_resources.R index 456032a..e50774e 100644 --- a/R/show_resources.R +++ b/R/show_resources.R @@ -18,7 +18,10 @@ show_resources <- function(dataset_name) { # define list of resource IDs and names within dataset all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) all_names <- purrr::map_chr(content$result$resources, ~ .x$name) - return_value <- list("id" = all_ids, "names" = all_names) + all_date_created <- purrr::map_chr(content$result$resources, ~ .x$created) + all_date_modified <- purrr::map_chr(content$result$resources, ~ .x$last_modified) + return_value <- tibble::tibble("res_id" = all_ids, "name" = all_names, + "created" = all_date_created, "last_modified" = all_date_modified) return(return_value) } From 57c6345456a9462d98a99409801c3497b1069395 Mon Sep 17 00:00:00 2001 From: ross-hull Date: Thu, 9 May 2024 15:30:32 +0000 Subject: [PATCH 16/93] Style code (GHA) --- R/show_resources.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/show_resources.R b/R/show_resources.R index e50774e..2c02164 100644 --- a/R/show_resources.R +++ b/R/show_resources.R @@ -20,8 +20,10 @@ show_resources <- function(dataset_name) { all_names <- purrr::map_chr(content$result$resources, ~ .x$name) all_date_created <- purrr::map_chr(content$result$resources, ~ .x$created) all_date_modified <- purrr::map_chr(content$result$resources, ~ .x$last_modified) - return_value <- tibble::tibble("res_id" = all_ids, "name" = all_names, - "created" = all_date_created, "last_modified" = all_date_modified) + return_value <- tibble::tibble( + "res_id" = all_ids, "name" = all_names, + "created" = all_date_created, "last_modified" = all_date_modified + ) return(return_value) } From b94c68c3caf31fd3fb80571714b9916e5b134406 Mon Sep 17 00:00:00 2001 From: ross hull Date: Mon, 13 May 2024 15:18:23 +0100 Subject: [PATCH 17/93] add roxygen comment to R/show_datasets.R --- R/show_datasets.R | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/R/show_datasets.R b/R/show_datasets.R index edda707..6c593ff 100644 --- a/R/show_datasets.R +++ b/R/show_datasets.R @@ -1,5 +1,16 @@ +#' Show all available datasets +#' +#' `show_datasets()`Shows all datasets available form phs open data platform. +#' +#' @return A character vector. +#' @export +#' +#' @examples +#' show_datasets() +#' show_datasets <- function() { - data_sets <- phs_GET("package_list", "")$result + data_sets <- phs_GET("package_list", "")$result %>% + unlist() return(data_sets) } From 650ab7c94396518e9626217658fa2f5f0aeb08ca Mon Sep 17 00:00:00 2001 From: ross-hull Date: Mon, 13 May 2024 14:53:36 +0000 Subject: [PATCH 18/93] Update documentation --- NAMESPACE | 1 + man/show_datasets.Rd | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 man/show_datasets.Rd diff --git a/NAMESPACE b/NAMESPACE index 369705a..9b2bc4a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,4 +3,5 @@ export(get_dataset) export(get_resource) export(get_resource_sql) +export(show_datasets) importFrom(magrittr,"%>%") diff --git a/man/show_datasets.Rd b/man/show_datasets.Rd new file mode 100644 index 0000000..15e0a1e --- /dev/null +++ b/man/show_datasets.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/show_datasets.R +\name{show_datasets} +\alias{show_datasets} +\title{Show all available datasets} +\usage{ +show_datasets() +} +\value{ +A character vector. +} +\description{ +\code{show_datasets()}Shows all datasets available form phs open data platform. +} +\examples{ +show_datasets() + +} From d76c1c5916c58713606f71c1e52deb3d79a864f4 Mon Sep 17 00:00:00 2001 From: ross hull Date: Mon, 13 May 2024 16:07:03 +0100 Subject: [PATCH 19/93] add roxygen comment to show_resources.R --- R/show_resources.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/show_resources.R b/R/show_resources.R index 2c02164..6d0886d 100644 --- a/R/show_resources.R +++ b/R/show_resources.R @@ -1,3 +1,12 @@ +#' Show all available resources for a dataset +#' +#' @param dataset_name +#' +#' @return a [tibble][tibble::tibble-package] with the data +#' @export +#' +#' @examples +#' show_resources("weekly-accident-and-emergency-activity-and-waiting-times") show_resources <- function(dataset_name) { # throw error if name type/format is invalid check_dataset_name(dataset_name) From 96e5eb73ac4c22042cabc8f3d8ada781d6811d32 Mon Sep 17 00:00:00 2001 From: ross-hull Date: Mon, 13 May 2024 15:09:45 +0000 Subject: [PATCH 20/93] Update documentation --- NAMESPACE | 1 + man/show_resources.Rd | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 man/show_resources.Rd diff --git a/NAMESPACE b/NAMESPACE index 9b2bc4a..125a232 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,4 +4,5 @@ export(get_dataset) export(get_resource) export(get_resource_sql) export(show_datasets) +export(show_resources) importFrom(magrittr,"%>%") diff --git a/man/show_resources.Rd b/man/show_resources.Rd new file mode 100644 index 0000000..0151074 --- /dev/null +++ b/man/show_resources.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/show_resources.R +\name{show_resources} +\alias{show_resources} +\title{Show all available resources for a dataset} +\usage{ +show_resources(dataset_name) +} +\arguments{ +\item{dataset_name}{} +} +\value{ +a \link[tibble:tibble-package]{tibble} with the data +} +\description{ +Show all available resources for a dataset +} +\examples{ +show_resources("weekly-accident-and-emergency-activity-and-waiting-times") +} From a0f153eb00b4175d030eacb2360b39895784e5e4 Mon Sep 17 00:00:00 2001 From: ross hull Date: Mon, 13 May 2024 16:10:16 +0100 Subject: [PATCH 21/93] add documentation --- NAMESPACE | 1 + R/show_resources.R | 3 +++ 2 files changed, 4 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 9b2bc4a..125a232 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,4 +4,5 @@ export(get_dataset) export(get_resource) export(get_resource_sql) export(show_datasets) +export(show_resources) importFrom(magrittr,"%>%") diff --git a/R/show_resources.R b/R/show_resources.R index 6d0886d..91f776a 100644 --- a/R/show_resources.R +++ b/R/show_resources.R @@ -1,5 +1,8 @@ #' Show all available resources for a dataset #' +#' show_resources() returns all of the resources associated +#' with a dataset +#' #' @param dataset_name #' #' @return a [tibble][tibble::tibble-package] with the data From afe2fbcdc620d12c231e2b46eb324d25490bb256 Mon Sep 17 00:00:00 2001 From: ross-hull Date: Mon, 13 May 2024 15:13:51 +0000 Subject: [PATCH 22/93] Update documentation --- man/show_resources.Rd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/show_resources.Rd b/man/show_resources.Rd index 0151074..583db9b 100644 --- a/man/show_resources.Rd +++ b/man/show_resources.Rd @@ -13,7 +13,8 @@ show_resources(dataset_name) a \link[tibble:tibble-package]{tibble} with the data } \description{ -Show all available resources for a dataset +show_resources() returns all of the resources associated +with a dataset } \examples{ show_resources("weekly-accident-and-emergency-activity-and-waiting-times") From 84c819311c13dcf2848b1f5e5fe094df5aa6ac05 Mon Sep 17 00:00:00 2001 From: ross hull Date: Tue, 14 May 2024 13:30:03 +0100 Subject: [PATCH 23/93] fix wording of description for show_datasets --- R/show_datasets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/show_datasets.R b/R/show_datasets.R index 6c593ff..7cde212 100644 --- a/R/show_datasets.R +++ b/R/show_datasets.R @@ -1,6 +1,6 @@ #' Show all available datasets #' -#' `show_datasets()`Shows all datasets available form phs open data platform. +#' `show_datasets()` shows all of the datasets hosted on the phs open data platform. #' #' @return A character vector. #' @export From 5d9a2ac076d958f6acdf16b6d753ff5a072908a9 Mon Sep 17 00:00:00 2001 From: ross-hull Date: Tue, 14 May 2024 12:33:03 +0000 Subject: [PATCH 24/93] Update documentation --- man/show_datasets.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/show_datasets.Rd b/man/show_datasets.Rd index 15e0a1e..6648497 100644 --- a/man/show_datasets.Rd +++ b/man/show_datasets.Rd @@ -10,7 +10,7 @@ show_datasets() A character vector. } \description{ -\code{show_datasets()}Shows all datasets available form phs open data platform. +\code{show_datasets()} shows all of the datasets hosted on the phs open data platform. } \examples{ show_datasets() From 07f020621c25816813d7a642a41cb8ef42690deb Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Tue, 14 May 2024 16:47:01 +0100 Subject: [PATCH 25/93] Update R/show_datasets.R Co-authored-by: James McMahon --- R/show_datasets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/show_datasets.R b/R/show_datasets.R index 7cde212..5e81233 100644 --- a/R/show_datasets.R +++ b/R/show_datasets.R @@ -6,7 +6,7 @@ #' @export #' #' @examples -#' show_datasets() +#' head(show_datasets()) #' show_datasets <- function() { data_sets <- phs_GET("package_list", "")$result %>% From 5cf636103dadb07391d9fb48ba78764d5c7edc19 Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Tue, 14 May 2024 16:47:48 +0100 Subject: [PATCH 26/93] Update R/show_resources.R Co-authored-by: James McMahon --- R/show_resources.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/show_resources.R b/R/show_resources.R index 91f776a..5a84781 100644 --- a/R/show_resources.R +++ b/R/show_resources.R @@ -3,7 +3,7 @@ #' show_resources() returns all of the resources associated #' with a dataset #' -#' @param dataset_name +#' @inheritParams get_dataset #' #' @return a [tibble][tibble::tibble-package] with the data #' @export From 63d7b7c9cae0c61f41f1c5003e54ad046f1c538c Mon Sep 17 00:00:00 2001 From: ross-hull Date: Tue, 14 May 2024 15:48:59 +0000 Subject: [PATCH 27/93] Update documentation --- man/show_datasets.Rd | 2 +- man/show_resources.Rd | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/man/show_datasets.Rd b/man/show_datasets.Rd index 6648497..89705f2 100644 --- a/man/show_datasets.Rd +++ b/man/show_datasets.Rd @@ -13,6 +13,6 @@ A character vector. \code{show_datasets()} shows all of the datasets hosted on the phs open data platform. } \examples{ -show_datasets() +head(show_datasets()) } diff --git a/man/show_resources.Rd b/man/show_resources.Rd index 583db9b..5383dfe 100644 --- a/man/show_resources.Rd +++ b/man/show_resources.Rd @@ -7,7 +7,8 @@ show_resources(dataset_name) } \arguments{ -\item{dataset_name}{} +\item{dataset_name}{name of the dataset as found on +\href{https://www.opendata.nhs.scot/}{NHS Open Data platform}} } \value{ a \link[tibble:tibble-package]{tibble} with the data From e9d7e8136020c7526aef94a17a9e5a0463053383 Mon Sep 17 00:00:00 2001 From: ross hull Date: Tue, 14 May 2024 17:06:50 +0100 Subject: [PATCH 28/93] change name of show_datasets to list_datasets --- R/show_datasets.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/show_datasets.R b/R/show_datasets.R index 5e81233..f225d83 100644 --- a/R/show_datasets.R +++ b/R/show_datasets.R @@ -1,6 +1,6 @@ #' Show all available datasets #' -#' `show_datasets()` shows all of the datasets hosted on the phs open data platform. +#' `list_datasets()` shows all of the datasets hosted on the phs open data platform. #' #' @return A character vector. #' @export @@ -8,7 +8,7 @@ #' @examples #' head(show_datasets()) #' -show_datasets <- function() { +list_datasets <- function() { data_sets <- phs_GET("package_list", "")$result %>% unlist() From 3ca9a48e844554200a409fa45d44187f0a935590 Mon Sep 17 00:00:00 2001 From: ross-hull Date: Tue, 14 May 2024 16:08:20 +0000 Subject: [PATCH 29/93] Update documentation --- NAMESPACE | 2 +- man/{show_datasets.Rd => list_datasets.Rd} | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) rename man/{show_datasets.Rd => list_datasets.Rd} (66%) diff --git a/NAMESPACE b/NAMESPACE index 125a232..163072e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,6 @@ export(get_dataset) export(get_resource) export(get_resource_sql) -export(show_datasets) +export(list_datasets) export(show_resources) importFrom(magrittr,"%>%") diff --git a/man/show_datasets.Rd b/man/list_datasets.Rd similarity index 66% rename from man/show_datasets.Rd rename to man/list_datasets.Rd index 89705f2..4ea688c 100644 --- a/man/show_datasets.Rd +++ b/man/list_datasets.Rd @@ -1,16 +1,16 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/show_datasets.R -\name{show_datasets} -\alias{show_datasets} +\name{list_datasets} +\alias{list_datasets} \title{Show all available datasets} \usage{ -show_datasets() +list_datasets() } \value{ A character vector. } \description{ -\code{show_datasets()} shows all of the datasets hosted on the phs open data platform. +\code{list_datasets()} shows all of the datasets hosted on the phs open data platform. } \examples{ head(show_datasets()) From 422b61dee68183290b9c71f3b651280c01ec3c8d Mon Sep 17 00:00:00 2001 From: ross hull Date: Tue, 14 May 2024 17:12:04 +0100 Subject: [PATCH 30/93] change file name --- R/{show_datasets.R => list_datasets.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{show_datasets.R => list_datasets.R} (100%) diff --git a/R/show_datasets.R b/R/list_datasets.R similarity index 100% rename from R/show_datasets.R rename to R/list_datasets.R From 180631d5fcf209046e4f3b01b96ee9c5faba4a83 Mon Sep 17 00:00:00 2001 From: ross-hull Date: Tue, 14 May 2024 16:13:39 +0000 Subject: [PATCH 31/93] Update documentation --- man/list_datasets.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/list_datasets.Rd b/man/list_datasets.Rd index 4ea688c..0c08ff5 100644 --- a/man/list_datasets.Rd +++ b/man/list_datasets.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/show_datasets.R +% Please edit documentation in R/list_datasets.R \name{list_datasets} \alias{list_datasets} \title{Show all available datasets} From 8417553a69efc9bf7f65a750e7979340a9af8ba1 Mon Sep 17 00:00:00 2001 From: ross hull Date: Tue, 14 May 2024 17:18:47 +0100 Subject: [PATCH 32/93] make changes from show to list for list_resources --- R/list_datasets.R | 4 ++-- R/{show_resources.R => list_resources.R} | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) rename R/{show_resources.R => list_resources.R} (83%) diff --git a/R/list_datasets.R b/R/list_datasets.R index f225d83..d9685c8 100644 --- a/R/list_datasets.R +++ b/R/list_datasets.R @@ -1,4 +1,4 @@ -#' Show all available datasets +#' Lists all available datasets #' #' `list_datasets()` shows all of the datasets hosted on the phs open data platform. #' @@ -6,7 +6,7 @@ #' @export #' #' @examples -#' head(show_datasets()) +#' head(list_datasets()) #' list_datasets <- function() { data_sets <- phs_GET("package_list", "")$result %>% diff --git a/R/show_resources.R b/R/list_resources.R similarity index 83% rename from R/show_resources.R rename to R/list_resources.R index 5a84781..9f8ea8b 100644 --- a/R/show_resources.R +++ b/R/list_resources.R @@ -1,6 +1,6 @@ -#' Show all available resources for a dataset +#' Lists all available resources for a dataset #' -#' show_resources() returns all of the resources associated +#' list_resources() returns all of the resources associated #' with a dataset #' #' @inheritParams get_dataset @@ -9,8 +9,8 @@ #' @export #' #' @examples -#' show_resources("weekly-accident-and-emergency-activity-and-waiting-times") -show_resources <- function(dataset_name) { +#' list_resources("weekly-accident-and-emergency-activity-and-waiting-times") +list_resources <- function(dataset_name) { # throw error if name type/format is invalid check_dataset_name(dataset_name) From 5e7dc5fa92f6ab362e807355153ac8c40d1ca73e Mon Sep 17 00:00:00 2001 From: ross-hull Date: Tue, 14 May 2024 16:20:18 +0000 Subject: [PATCH 33/93] Update documentation --- NAMESPACE | 2 +- man/list_datasets.Rd | 4 ++-- man/{show_resources.Rd => list_resources.Rd} | 14 +++++++------- 3 files changed, 10 insertions(+), 10 deletions(-) rename man/{show_resources.Rd => list_resources.Rd} (50%) diff --git a/NAMESPACE b/NAMESPACE index 163072e..2fcaf55 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,5 +4,5 @@ export(get_dataset) export(get_resource) export(get_resource_sql) export(list_datasets) -export(show_resources) +export(list_resources) importFrom(magrittr,"%>%") diff --git a/man/list_datasets.Rd b/man/list_datasets.Rd index 0c08ff5..97f783c 100644 --- a/man/list_datasets.Rd +++ b/man/list_datasets.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/list_datasets.R \name{list_datasets} \alias{list_datasets} -\title{Show all available datasets} +\title{Lists all available datasets} \usage{ list_datasets() } @@ -13,6 +13,6 @@ A character vector. \code{list_datasets()} shows all of the datasets hosted on the phs open data platform. } \examples{ -head(show_datasets()) +head(list_datasets()) } diff --git a/man/show_resources.Rd b/man/list_resources.Rd similarity index 50% rename from man/show_resources.Rd rename to man/list_resources.Rd index 5383dfe..4a04845 100644 --- a/man/show_resources.Rd +++ b/man/list_resources.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/show_resources.R -\name{show_resources} -\alias{show_resources} -\title{Show all available resources for a dataset} +% Please edit documentation in R/list_resources.R +\name{list_resources} +\alias{list_resources} +\title{Lists all available resources for a dataset} \usage{ -show_resources(dataset_name) +list_resources(dataset_name) } \arguments{ \item{dataset_name}{name of the dataset as found on @@ -14,9 +14,9 @@ show_resources(dataset_name) a \link[tibble:tibble-package]{tibble} with the data } \description{ -show_resources() returns all of the resources associated +list_resources() returns all of the resources associated with a dataset } \examples{ -show_resources("weekly-accident-and-emergency-activity-and-waiting-times") +list_resources("weekly-accident-and-emergency-activity-and-waiting-times") } From d3eb4d78c37b955ca9150b7c90d0e79c3674638e Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 16 May 2024 13:44:00 +0100 Subject: [PATCH 34/93] create empty get_datasets_additional_info file and function --- R/get_dataset_additional_info.R | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 R/get_dataset_additional_info.R diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R new file mode 100644 index 0000000..0ac8411 --- /dev/null +++ b/R/get_dataset_additional_info.R @@ -0,0 +1,4 @@ +get_datasets_additional_info <- function(dataset_name){ + +} + From 926ba598f49c774ca1339b49d5cc159d1bf0e81f Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 16 May 2024 14:01:38 +0100 Subject: [PATCH 35/93] write get_dataset_additional_info function --- R/get_dataset_additional_info.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index 0ac8411..0ab1d37 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -1,4 +1,25 @@ get_datasets_additional_info <- function(dataset_name){ + query <- paste0("id=", dataset_name) + content <- phs_GET("package_show", query) + + amount_of_resources <- content$result$resources %>% + length() + + last_resource_created_date <- purrr::map(content$result$resources, ~.$created) %>% + unlist() %>% + max() + + last_resource_modified_date <- purrr::map(content$result$resources, ~.$last_modified) %>% + unlist() %>% + max() + + most_recent_resource_date <- c(last_resource_modified_date, last_resource_created_date) %>% + max() + + return_value <- list(amount_of_resources, most_recent_resource_date) + + return(return_value) } + From 55a8ab32b9ca4df1df1e050470daee29c414bec4 Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 16 May 2024 15:11:18 +0100 Subject: [PATCH 36/93] create option for additional info in list datasets --- R/get_dataset_additional_info.R | 4 +++- R/list_datasets.R | 24 +++++++++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index 0ab1d37..05f6271 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -16,7 +16,9 @@ get_datasets_additional_info <- function(dataset_name){ most_recent_resource_date <- c(last_resource_modified_date, last_resource_created_date) %>% max() - return_value <- list(amount_of_resources, most_recent_resource_date) + return_value <- list("name" = dataset_name, + "amount_of_resources" = amount_of_resources, + "most_recent_resource_date" = most_recent_resource_date) return(return_value) diff --git a/R/list_datasets.R b/R/list_datasets.R index d9685c8..3ea158c 100644 --- a/R/list_datasets.R +++ b/R/list_datasets.R @@ -8,9 +8,27 @@ #' @examples #' head(list_datasets()) #' -list_datasets <- function() { - data_sets <- phs_GET("package_list", "")$result %>% +list_datasets <- function(include_additional_info = F) { + data_sets <- phs_GET("package_list", "")$result + if(!include_additional_info){ + data_sets%>% unlist() - return(data_sets) + return(data_sets)} + else{ + datasets <- purrr::map(data_sets, get_datasets_additional_info) + + return_value <- tibble::tibble(name = purrr::map_chr(datasets, ~.$name), + most_recent_resource_date = purrr::map_chr(datasets, ~.$most_recent_resource_date), + amount_of_resources = purrr::map_int(datasets, ~.$amount_of_resources)) %>% + dplyr::mutate(most_recent_resource_date = as.POSIXct(most_recent_resource_date, format = "%FT%X", tz = "UTC")) + + return(return_value) + } } + + + + + + From e2ca4476a49a8ff8087f10b224704b8896b6ce85 Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 16 May 2024 15:21:22 +0100 Subject: [PATCH 37/93] add and update roxygen comments --- R/get_dataset_additional_info.R | 8 ++++++++ R/list_datasets.R | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index 05f6271..406dc00 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -1,3 +1,11 @@ +#' returns a list of dataset names alongwith additional information, +#' such as the amount of resources and the date it was last updated +#' +#' @param dataset_name +#' +#' @return +#' @export +#' get_datasets_additional_info <- function(dataset_name){ query <- paste0("id=", dataset_name) content <- phs_GET("package_show", query) diff --git a/R/list_datasets.R b/R/list_datasets.R index 3ea158c..0afb6cf 100644 --- a/R/list_datasets.R +++ b/R/list_datasets.R @@ -2,11 +2,15 @@ #' #' `list_datasets()` shows all of the datasets hosted on the phs open data platform. #' +#' @param include_additional_info (optional) set to true to return the number +#' of resources per dataset and the date the dataset was last updated +#' #' @return A character vector. #' @export #' #' @examples #' head(list_datasets()) +#' head(list_datasets(include_additional_info = T)) #' list_datasets <- function(include_additional_info = F) { data_sets <- phs_GET("package_list", "")$result From 8b07eacf1936c1f4e975e1df3f3f0ef081495712 Mon Sep 17 00:00:00 2001 From: ross-hull Date: Thu, 16 May 2024 14:24:04 +0000 Subject: [PATCH 38/93] Update documentation --- NAMESPACE | 1 + man/get_datasets_additional_info.Rd | 16 ++++++++++++++++ man/list_datasets.Rd | 7 ++++++- 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 man/get_datasets_additional_info.Rd diff --git a/NAMESPACE b/NAMESPACE index 2fcaf55..9ff8a3a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(get_dataset) +export(get_datasets_additional_info) export(get_resource) export(get_resource_sql) export(list_datasets) diff --git a/man/get_datasets_additional_info.Rd b/man/get_datasets_additional_info.Rd new file mode 100644 index 0000000..f126bfb --- /dev/null +++ b/man/get_datasets_additional_info.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_dataset_additional_info.R +\name{get_datasets_additional_info} +\alias{get_datasets_additional_info} +\title{returns a list of dataset names alongwith additional information, +such as the amount of resources and the date it was last updated} +\usage{ +get_datasets_additional_info(dataset_name) +} +\arguments{ +\item{dataset_name}{} +} +\description{ +returns a list of dataset names alongwith additional information, +such as the amount of resources and the date it was last updated +} diff --git a/man/list_datasets.Rd b/man/list_datasets.Rd index 97f783c..df9f63e 100644 --- a/man/list_datasets.Rd +++ b/man/list_datasets.Rd @@ -4,7 +4,11 @@ \alias{list_datasets} \title{Lists all available datasets} \usage{ -list_datasets() +list_datasets(include_additional_info = F) +} +\arguments{ +\item{include_additional_info}{(optional) set to true to return the number +of resources per dataset and the date the dataset was last updated} } \value{ A character vector. @@ -14,5 +18,6 @@ A character vector. } \examples{ head(list_datasets()) +head(list_datasets(include_additional_info = T)) } From 6fcb440e4f5985231f86f46610929b44bf300099 Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Wed, 22 May 2024 12:20:00 +0100 Subject: [PATCH 39/93] remove export R/get_dataset_additional_info.R Co-authored-by: James McMahon --- R/get_dataset_additional_info.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index 406dc00..bacb8f5 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -4,8 +4,6 @@ #' @param dataset_name #' #' @return -#' @export -#' get_datasets_additional_info <- function(dataset_name){ query <- paste0("id=", dataset_name) content <- phs_GET("package_show", query) From 0664388260dcd7e04e4a3d692e9edd72a17eaee1 Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Wed, 22 May 2024 12:21:15 +0100 Subject: [PATCH 40/93] Update method of max date retrieval R/get_dataset_additional_info.R Co-authored-by: James McMahon --- R/get_dataset_additional_info.R | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index bacb8f5..bb37b62 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -11,16 +11,11 @@ get_datasets_additional_info <- function(dataset_name){ amount_of_resources <- content$result$resources %>% length() - last_resource_created_date <- purrr::map(content$result$resources, ~.$created) %>% - unlist() %>% - max() + last_resource_created_date <- purrr::map_chr(content$result$resources, ~.$created) - last_resource_modified_date <- purrr::map(content$result$resources, ~.$last_modified) %>% - unlist() %>% - max() + last_resource_modified_date <- purrr::map_chr(content$result$resources, ~.$last_modified) - most_recent_resource_date <- c(last_resource_modified_date, last_resource_created_date) %>% - max() + most_recent_resource_date <- max(last_resource_modified_date, last_resource_created_date) return_value <- list("name" = dataset_name, "amount_of_resources" = amount_of_resources, From 213584cba10083e05ec55a1c01bdf5a694d364aa Mon Sep 17 00:00:00 2001 From: ross-hull Date: Wed, 22 May 2024 11:22:25 +0000 Subject: [PATCH 41/93] Update documentation --- NAMESPACE | 1 - 1 file changed, 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 9ff8a3a..2fcaf55 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,6 @@ # Generated by roxygen2: do not edit by hand export(get_dataset) -export(get_datasets_additional_info) export(get_resource) export(get_resource_sql) export(list_datasets) From 971600c66d7138bdc87fcbc144baa056caff386c Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Wed, 22 May 2024 12:37:49 +0100 Subject: [PATCH 42/93] use list instead of paste0 in R/get_dataset_additional_info.R Co-authored-by: James McMahon --- R/get_dataset_additional_info.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index bb37b62..22c356e 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -5,7 +5,7 @@ #' #' @return get_datasets_additional_info <- function(dataset_name){ - query <- paste0("id=", dataset_name) + query <- list("id" = dataset_name) content <- phs_GET("package_show", query) amount_of_resources <- content$result$resources %>% From 7b0f819f0ec3e364f104ca66be7c778cee1eca79 Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Wed, 22 May 2024 12:38:56 +0100 Subject: [PATCH 43/93] Update R/list_datasets.R Co-authored-by: James McMahon --- R/list_datasets.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/list_datasets.R b/R/list_datasets.R index 0afb6cf..2eb257a 100644 --- a/R/list_datasets.R +++ b/R/list_datasets.R @@ -10,9 +10,8 @@ #' #' @examples #' head(list_datasets()) -#' head(list_datasets(include_additional_info = T)) -#' -list_datasets <- function(include_additional_info = F) { +#' head(list_datasets(include_additional_info = TRUE)) +list_datasets <- function(include_additional_info = FASLE) { data_sets <- phs_GET("package_list", "")$result if(!include_additional_info){ data_sets%>% From e56369dd3e821f38d6c826653afe30df38552965 Mon Sep 17 00:00:00 2001 From: ross-hull Date: Wed, 22 May 2024 11:40:04 +0000 Subject: [PATCH 44/93] Update documentation --- man/list_datasets.Rd | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/man/list_datasets.Rd b/man/list_datasets.Rd index df9f63e..e18d3dc 100644 --- a/man/list_datasets.Rd +++ b/man/list_datasets.Rd @@ -4,7 +4,7 @@ \alias{list_datasets} \title{Lists all available datasets} \usage{ -list_datasets(include_additional_info = F) +list_datasets(include_additional_info = FASLE) } \arguments{ \item{include_additional_info}{(optional) set to true to return the number @@ -18,6 +18,5 @@ A character vector. } \examples{ head(list_datasets()) -head(list_datasets(include_additional_info = T)) - +head(list_datasets(include_additional_info = TRUE)) } From a1f7466f7eaa3ea98287c4e0d0228e536802f1d4 Mon Sep 17 00:00:00 2001 From: ross hull Date: Wed, 22 May 2024 12:43:39 +0100 Subject: [PATCH 45/93] fix typo --- R/list_datasets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/list_datasets.R b/R/list_datasets.R index 2eb257a..1a58c4c 100644 --- a/R/list_datasets.R +++ b/R/list_datasets.R @@ -11,7 +11,7 @@ #' @examples #' head(list_datasets()) #' head(list_datasets(include_additional_info = TRUE)) -list_datasets <- function(include_additional_info = FASLE) { +list_datasets <- function(include_additional_info = FALSE) { data_sets <- phs_GET("package_list", "")$result if(!include_additional_info){ data_sets%>% From fe3d339bf0de14626c9ff3a1348e446cabcfad61 Mon Sep 17 00:00:00 2001 From: ross hull Date: Wed, 22 May 2024 13:06:24 +0100 Subject: [PATCH 46/93] list datasets to return a tibble when include_additional_info is FALSE --- R/list_datasets.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/list_datasets.R b/R/list_datasets.R index 1a58c4c..899cdb8 100644 --- a/R/list_datasets.R +++ b/R/list_datasets.R @@ -14,8 +14,8 @@ list_datasets <- function(include_additional_info = FALSE) { data_sets <- phs_GET("package_list", "")$result if(!include_additional_info){ - data_sets%>% - unlist() + data_sets <- tibble::tibble("name" = unlist(data_sets)) + return(data_sets)} else{ From eef0c76e4ead456655419dfe73b7b840dcf3f5f6 Mon Sep 17 00:00:00 2001 From: ross-hull Date: Wed, 22 May 2024 12:15:32 +0000 Subject: [PATCH 47/93] Update documentation --- man/list_datasets.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/list_datasets.Rd b/man/list_datasets.Rd index e18d3dc..e64b511 100644 --- a/man/list_datasets.Rd +++ b/man/list_datasets.Rd @@ -4,7 +4,7 @@ \alias{list_datasets} \title{Lists all available datasets} \usage{ -list_datasets(include_additional_info = FASLE) +list_datasets(include_additional_info = FALSE) } \arguments{ \item{include_additional_info}{(optional) set to true to return the number From 9b2acacceeee1ba669e7626358629d2c77a90e77 Mon Sep 17 00:00:00 2001 From: ross hull Date: Wed, 12 Jun 2024 17:00:28 +0100 Subject: [PATCH 48/93] typo --- R/get_latest_resource_id.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index cb1c99c..b7e317d 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -32,7 +32,7 @@ get_latest_resource_id <- function(dataset_name){ suggest_dataset_name(dataset_name) } - #sned the api request + #send the api request query <- list("id" = dataset_name) content <- try( phs_GET("package_show", query), From a226d4576dd6fada976194cd03ae20e73a3aa1e0 Mon Sep 17 00:00:00 2001 From: ross hull Date: Wed, 12 Jun 2024 17:47:43 +0100 Subject: [PATCH 49/93] add additional checking to get_latest_resourec_id by also checking if resource is first row --- R/get_latest_resource_id.R | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index b7e317d..8186548 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -53,15 +53,19 @@ get_latest_resource_id <- function(dataset_name){ created_date <- append(created_date, i$created) modified_date <- append(modified_date, i$last_modified) } - all_id_data <- list(id = id, + all_id_data <- tibble::tibble(id = id, created_date = strptime(created_date, format = "%FT%X", tz = "UTC"), - modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC")) + modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC")) %>% + dplyr::mutate(most_recent_date_created = max(created_date)) + + all_id_data_first_row <- all_id_data %>% + dplyr::slice(1) + + if(all_id_data_first_row$created_date == all_id_data_first_row$most_recent_date_created){ + return(all_id_data_first_row$id) + }else(warning("most recent id could not be identified")) - #filter for the id with the most recent created date - df <- data.frame(all_id_data) %>% - subset(created_date == max(created_date)) - return(df$id) } From d8cb21ef7a2c955d92b85f5f29f9fba45c95398c Mon Sep 17 00:00:00 2001 From: ross-hull Date: Fri, 14 Jun 2024 15:20:11 +0000 Subject: [PATCH 50/93] Style code (GHA) --- R/get_dataset_additional_info.R | 17 ++++++++--------- R/list_datasets.R | 20 ++++++++------------ 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index 22c356e..bdd7741 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -4,25 +4,24 @@ #' @param dataset_name #' #' @return -get_datasets_additional_info <- function(dataset_name){ +get_datasets_additional_info <- function(dataset_name) { query <- list("id" = dataset_name) content <- phs_GET("package_show", query) amount_of_resources <- content$result$resources %>% length() - last_resource_created_date <- purrr::map_chr(content$result$resources, ~.$created) + last_resource_created_date <- purrr::map_chr(content$result$resources, ~ .$created) - last_resource_modified_date <- purrr::map_chr(content$result$resources, ~.$last_modified) + last_resource_modified_date <- purrr::map_chr(content$result$resources, ~ .$last_modified) most_recent_resource_date <- max(last_resource_modified_date, last_resource_created_date) - return_value <- list("name" = dataset_name, - "amount_of_resources" = amount_of_resources, - "most_recent_resource_date" = most_recent_resource_date) + return_value <- list( + "name" = dataset_name, + "amount_of_resources" = amount_of_resources, + "most_recent_resource_date" = most_recent_resource_date + ) return(return_value) - } - - diff --git a/R/list_datasets.R b/R/list_datasets.R index 899cdb8..8963f45 100644 --- a/R/list_datasets.R +++ b/R/list_datasets.R @@ -13,25 +13,21 @@ #' head(list_datasets(include_additional_info = TRUE)) list_datasets <- function(include_additional_info = FALSE) { data_sets <- phs_GET("package_list", "")$result - if(!include_additional_info){ + if (!include_additional_info) { data_sets <- tibble::tibble("name" = unlist(data_sets)) - return(data_sets)} - else{ + return(data_sets) + } else { datasets <- purrr::map(data_sets, get_datasets_additional_info) - return_value <- tibble::tibble(name = purrr::map_chr(datasets, ~.$name), - most_recent_resource_date = purrr::map_chr(datasets, ~.$most_recent_resource_date), - amount_of_resources = purrr::map_int(datasets, ~.$amount_of_resources)) %>% + return_value <- tibble::tibble( + name = purrr::map_chr(datasets, ~ .$name), + most_recent_resource_date = purrr::map_chr(datasets, ~ .$most_recent_resource_date), + amount_of_resources = purrr::map_int(datasets, ~ .$amount_of_resources) + ) %>% dplyr::mutate(most_recent_resource_date = as.POSIXct(most_recent_resource_date, format = "%FT%X", tz = "UTC")) return(return_value) } } - - - - - - From 7ae5e0456d749221477d9145aa73a6cdfdd472de Mon Sep 17 00:00:00 2001 From: ross hull Date: Fri, 14 Jun 2024 16:48:54 +0100 Subject: [PATCH 51/93] remove include additional info parameter from list datasets --- R/list_datasets.R | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/R/list_datasets.R b/R/list_datasets.R index 899cdb8..6c9bf11 100644 --- a/R/list_datasets.R +++ b/R/list_datasets.R @@ -2,33 +2,18 @@ #' #' `list_datasets()` shows all of the datasets hosted on the phs open data platform. #' -#' @param include_additional_info (optional) set to true to return the number -#' of resources per dataset and the date the dataset was last updated -#' -#' @return A character vector. +#' @return A tibble. #' @export #' #' @examples #' head(list_datasets()) -#' head(list_datasets(include_additional_info = TRUE)) -list_datasets <- function(include_additional_info = FALSE) { +list_datasets <- function() { data_sets <- phs_GET("package_list", "")$result - if(!include_additional_info){ - data_sets <- tibble::tibble("name" = unlist(data_sets)) + data_sets <- tibble::tibble("name" = unlist(data_sets)) return(data_sets)} - else{ - datasets <- purrr::map(data_sets, get_datasets_additional_info) - - return_value <- tibble::tibble(name = purrr::map_chr(datasets, ~.$name), - most_recent_resource_date = purrr::map_chr(datasets, ~.$most_recent_resource_date), - amount_of_resources = purrr::map_int(datasets, ~.$amount_of_resources)) %>% - dplyr::mutate(most_recent_resource_date = as.POSIXct(most_recent_resource_date, format = "%FT%X", tz = "UTC")) - return(return_value) - } -} From bf62035aebd1a42e1cb6cdf719a4c08d40f6cdcd Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 19 Jun 2024 15:29:03 +0100 Subject: [PATCH 52/93] Add bullet to NEWS --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index b7ce93b..5bad634 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,8 @@ dates to returned data (#24). - `get_dataset()` will now suggest multiple dataset names, when the dataset you've asked for doesn't exist (i.e. there's a typo) and there are multiple likely candidates (#28). +- Two new functions `list_datasets()` and `list_resources()` allow browsing +available datasets and resources (#10). # phsopendata 0.1.0 (2021-07-22) From 2ae53d9a1b162874a5d215ab94d3de4d81082a8b Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 19 Jun 2024 15:31:22 +0100 Subject: [PATCH 53/93] Add Ross (@ross-hull) as an package author --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f328f7b..8f04ce9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,7 +4,8 @@ Version: 0.1.0.9000 Authors@R: c( person("Csilla", "Scharle", , "csilla.scharle2@phs.scot", role = c("aut", "cre")), person("James", "McMahon", , "james.mcmahon@phs.scot", role = "aut"), - person("David", "Aikman", , "david.aikman@phs.scot", role = "aut") + person("David", "Aikman", , "david.aikman@phs.scot", role = "aut"), + person("Ross", "Hull", , "ross.hull2@phs.scot", role = "aut") ) Description: Functions to extract and interact with data from the Scottish Health and Social Care Open Data platform. From db691aa6dce2422760b417b1d659bb534e73691f Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 20 Jun 2024 15:29:36 +0100 Subject: [PATCH 54/93] refine get_dataset_additional_info --- R/get_dataset_additional_info.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index 22c356e..7a9d1bd 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -4,6 +4,7 @@ #' @param dataset_name #' #' @return +#' @export get_datasets_additional_info <- function(dataset_name){ query <- list("id" = dataset_name) content <- phs_GET("package_show", query) @@ -15,11 +16,12 @@ get_datasets_additional_info <- function(dataset_name){ last_resource_modified_date <- purrr::map_chr(content$result$resources, ~.$last_modified) - most_recent_resource_date <- max(last_resource_modified_date, last_resource_created_date) + most_recent_resource_date <- max(last_resource_modified_date, last_resource_created_date) %>% + as.POSIXct(format = "%FT%X", tz = "UTC") - return_value <- list("name" = dataset_name, + return_value <- tibble::tibble("name" = dataset_name, "amount_of_resources" = amount_of_resources, - "most_recent_resource_date" = most_recent_resource_date) + "most_recent_resource_update" = most_recent_resource_date) return(return_value) From c1978c05ef0f26524f0fc6066243baf9e383c328 Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 20 Jun 2024 15:36:38 +0100 Subject: [PATCH 55/93] convert dates from string to date times --- R/list_resources.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/list_resources.R b/R/list_resources.R index 9f8ea8b..9685988 100644 --- a/R/list_resources.R +++ b/R/list_resources.R @@ -30,8 +30,10 @@ list_resources <- function(dataset_name) { # define list of resource IDs and names within dataset all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) all_names <- purrr::map_chr(content$result$resources, ~ .x$name) - all_date_created <- purrr::map_chr(content$result$resources, ~ .x$created) - all_date_modified <- purrr::map_chr(content$result$resources, ~ .x$last_modified) + all_date_created <- purrr::map_chr(content$result$resources, ~ .x$created) %>% + as.POSIXct(format = "%FT%X", tz = "UTC") + all_date_modified <- purrr::map_chr(content$result$resources, ~ .x$last_modified) %>% + as.POSIXct(format = "%FT%X", tz = "UTC") return_value <- tibble::tibble( "res_id" = all_ids, "name" = all_names, "created" = all_date_created, "last_modified" = all_date_modified From b7a1f138b7e78c8bde01602262bd46922bf92ca8 Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 20 Jun 2024 15:42:10 +0100 Subject: [PATCH 56/93] typo --- R/get_dataset_additional_info.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index 7a9d1bd..ab15ee9 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -5,7 +5,7 @@ #' #' @return #' @export -get_datasets_additional_info <- function(dataset_name){ +get_dataset_additional_info <- function(dataset_name){ query <- list("id" = dataset_name) content <- phs_GET("package_show", query) From dede5fb6826f5e9bda824067bb9c1748b8f6a65b Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 20 Jun 2024 15:43:02 +0100 Subject: [PATCH 57/93] comment code --- R/list_datasets.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/list_datasets.R b/R/list_datasets.R index 6c9bf11..46da19c 100644 --- a/R/list_datasets.R +++ b/R/list_datasets.R @@ -8,6 +8,7 @@ #' @examples #' head(list_datasets()) list_datasets <- function() { + # fetch the data data_sets <- phs_GET("package_list", "")$result data_sets <- tibble::tibble("name" = unlist(data_sets)) From 2c64bfff1a667174ec20618e1800347bf47a336a Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 20 Jun 2024 15:48:40 +0100 Subject: [PATCH 58/93] comment code --- R/get_dataset_additional_info.R | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index ab15ee9..b5666a2 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -1,24 +1,29 @@ #' returns a list of dataset names alongwith additional information, #' such as the amount of resources and the date it was last updated #' -#' @param dataset_name +#' @inheritParams get_dataset #' -#' @return +#' @return a [tibble][tibble::tibble-package] with the data #' @export get_dataset_additional_info <- function(dataset_name){ + #define query query <- list("id" = dataset_name) + #fetch the data content <- phs_GET("package_show", query) + #get the amount of resources amount_of_resources <- content$result$resources %>% length() + #get the last recourse created and modified dates last_resource_created_date <- purrr::map_chr(content$result$resources, ~.$created) - last_resource_modified_date <- purrr::map_chr(content$result$resources, ~.$last_modified) + #get the latest between the created and modified dates and change to datetime format most_recent_resource_date <- max(last_resource_modified_date, last_resource_created_date) %>% as.POSIXct(format = "%FT%X", tz = "UTC") + #create tibble to return return_value <- tibble::tibble("name" = dataset_name, "amount_of_resources" = amount_of_resources, "most_recent_resource_update" = most_recent_resource_date) From 4bb18987dccfe0e6f78a46e2b341705ef871eef6 Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 20 Jun 2024 15:49:39 +0100 Subject: [PATCH 59/93] comment --- R/list_resources.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/list_resources.R b/R/list_resources.R index 9685988..732129a 100644 --- a/R/list_resources.R +++ b/R/list_resources.R @@ -27,7 +27,7 @@ list_resources <- function(dataset_name) { suggest_dataset_name(dataset_name) } - # define list of resource IDs and names within dataset + # define list of resource IDs names date created and date modified within dataset all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) all_names <- purrr::map_chr(content$result$resources, ~ .x$name) all_date_created <- purrr::map_chr(content$result$resources, ~ .x$created) %>% From 4df6a1c411b344941b53e490d47ffad385967551 Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 20 Jun 2024 15:52:38 +0100 Subject: [PATCH 60/93] add example to get_dataset_additional_info --- R/get_dataset_additional_info.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index b5666a2..912db94 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -5,6 +5,9 @@ #' #' @return a [tibble][tibble::tibble-package] with the data #' @export +#' @example +#' get_dataset("gp-practice-populations") +#' get_dataset_additional_info <- function(dataset_name){ #define query query <- list("id" = dataset_name) From 5a15a3bb4b84b88d18a4e2ac993768a471ab438c Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 20 Jun 2024 16:26:08 +0100 Subject: [PATCH 61/93] delete mistake empty file --- R/test-get_latest_resource_id.R | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 R/test-get_latest_resource_id.R diff --git a/R/test-get_latest_resource_id.R b/R/test-get_latest_resource_id.R deleted file mode 100644 index 4d3ff80..0000000 --- a/R/test-get_latest_resource_id.R +++ /dev/null @@ -1,21 +0,0 @@ -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -# RStudio Workbench is strictly for use by Public Health Scotland staff and -# authorised users only, and is governed by the Acceptable Usage Policy https://github.com/Public-Health-Scotland/R-Resources/blob/master/posit_workbench_acceptable_use_policy.md. -# -# This is a shared resource and is hosted on a pay-as-you-go cloud computing -# platform. Your usage will incur direct financial cost to Public Health -# Scotland. As such, please ensure -# -# 1. that this session is appropriately sized with the minimum number of CPUs -# and memory required for the size and scale of your analysis; -# 2. the code you write in this script is optimal and only writes out the -# data required, nothing more. -# 3. you close this session when not in use; idle sessions still cost PHS -# money! -# -# For further guidance, please see https://github.com/Public-Health-Scotland/R-Resources/blob/master/posit_workbench_best_practice_with_r.md. -# -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - - From bc50e49abf8d3c55deba7a0c3d4b7bb5a908afda Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 20 Jun 2024 16:44:15 +0100 Subject: [PATCH 62/93] add documentation --- NAMESPACE | 2 + R/get_dataset_additional_info.R | 7 +-- R/get_latest_resource.R | 66 +++++++++++++++++++++++++++++ R/get_latest_resource_id.R | 25 ++++++++++- R/list_resources.R | 2 +- man/get_dataset_additional_info.Rd | 22 ++++++++++ man/get_datasets_additional_info.Rd | 16 ------- man/get_latest_resource.Rd | 66 +++++++++++++++++++++++++++++ man/get_latest_resource_id.Rd | 29 +++++++++++++ man/list_datasets.Rd | 9 +--- man/list_resources.Rd | 2 +- 11 files changed, 217 insertions(+), 29 deletions(-) create mode 100644 R/get_latest_resource.R create mode 100644 man/get_dataset_additional_info.Rd delete mode 100644 man/get_datasets_additional_info.Rd create mode 100644 man/get_latest_resource.Rd create mode 100644 man/get_latest_resource_id.Rd diff --git a/NAMESPACE b/NAMESPACE index 9f9af6f..d8d8d4e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,8 @@ export("%>%") export(get_dataset) +export(get_dataset_additional_info) +export(get_latest_resource) export(get_resource) export(get_resource_sql) export(list_datasets) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index 912db94..1faec95 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -1,13 +1,14 @@ -#' returns a list of dataset names alongwith additional information, +#' get a datasets additional info +#' +#' `get_dataset_additional_info()`returns a list of dataset names alongwith additional information, #' such as the amount of resources and the date it was last updated #' #' @inheritParams get_dataset #' #' @return a [tibble][tibble::tibble-package] with the data #' @export -#' @example +#' @examples #' get_dataset("gp-practice-populations") -#' get_dataset_additional_info <- function(dataset_name){ #define query query <- list("id" = dataset_name) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R new file mode 100644 index 0000000..f499866 --- /dev/null +++ b/R/get_latest_resource.R @@ -0,0 +1,66 @@ +#' Get the latest resource from a data set +#' +#' There are some datasets on the open data platform that +#' keep historic resources instead of updating existing ones. +#' For these it is useful to be able to retrieve the latest +#' resource. These data sets include: +#' +#' gp-practice-populations +#' gp-practice-contact-details-and-list-size +#' nhsscotland-payments-to-general-practice, +#' dental-practices-and-patient-registrations, +#' general-practitioner-contact-details, +#' prescribed-dispensed, +#' prescriptions-in-the-community, +#' community-pharmacy-contractor-activity +#' +#' @param dataset_name name of the dataset as found on +#' \href{https://www.opendata.nhs.scot/}{NHS Open Data platform} +#' @param rows (optional) specify the max number of rows to return. +#' @param row_filters (optional) a named list or vector that specifies values of +#' columns/fields to keep. +#' e.g. list(Date = 20220216, Sex = "Female"). +#' @param col_select (optional) a character vector containing the names of +#' desired columns/fields. +#' e.g. c("Date", "Sex"). +#' @param include_context (optional) If `TRUE` additional information about the +#' resource will be added as columns to the data, including the resource ID, the +#' resource name, the creation date and the last modified/updated date. +#' +#' @return a [tibble][tibble::tibble-package] with the data +#' @export +#' +#' @examples +#' dataset_name <- "gp-practice-contact-details-and-list-sizes" +#' +#' data <- get_latest_resource(dataset_name) +#' +#' filters <- list("Postcode" = "DD11 1ES") +#' wanted_cols <- c("Practice", "Postcode", "Dispensing") +#' +#' filtered_data <- get_latest_resource( +#' dataset_name = dataset_name, +#' row_filters = filters, +#' col_select = wanted_cols +#' ) +#' +get_latest_resource <- function(dataset_name, + rows = NULL, + row_filters = NULL, + col_select = NULL, + include_context = FALSE){ + #get the latest resource id + id <- get_latest_resource_id(dataset_name) + + return_value <- get_resource(id, + rows, + row_filters, + col_select, + include_context) +} + + + + + + diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index 8186548..9e64433 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -1,3 +1,22 @@ +#' get_latest_resource_id +#' +#' to be confident that the resource returned is the one intended +#' two conditions have to be met. It has to appear at the top of +#' of the resource list as shown on the open data platform. +#' The order they are returned via the api is the same +#' as they appear on the open data platfrom. It also +#' has to have the most recent date created +#' +#' There are only some datasets that this functionality +#' is relevent to, these are listed within applicable +#' datasets and are the datasets that keep historic +#' resources instead of over writting them +#' +#' @inheritParams get_dataset +#' +#' @return a [tibble][tibble::tibble-package] with the data +#' + get_latest_resource_id <- function(dataset_name){ applicable_datasets <- c("gp-practice-populations", "gp-practice-contact-details-and-list-sizes", "nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations", @@ -20,7 +39,7 @@ get_latest_resource_id <- function(dataset_name){ if(!dataset_name %in% applicable_datasets){ cli::cli_abort(c( "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets", - "x" = "Please see documentation.", + "x" = "Please see get_latest_reource documentation.", "i" = "You can find dataset names in the URL of a dataset's page on {.url www.opendata.nhs.scot}." )) @@ -58,9 +77,13 @@ get_latest_resource_id <- function(dataset_name){ modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC")) %>% dplyr::mutate(most_recent_date_created = max(created_date)) + #get the first row of the rources, this will be the same that appears on the top + #on the open data platform all_id_data_first_row <- all_id_data %>% dplyr::slice(1) + #if the resource at the top as appearing on the open data platform also has the most + #recent date created, return it. Otherwise return warning if(all_id_data_first_row$created_date == all_id_data_first_row$most_recent_date_created){ return(all_id_data_first_row$id) }else(warning("most recent id could not be identified")) diff --git a/R/list_resources.R b/R/list_resources.R index 732129a..0a0a709 100644 --- a/R/list_resources.R +++ b/R/list_resources.R @@ -1,6 +1,6 @@ #' Lists all available resources for a dataset #' -#' list_resources() returns all of the resources associated +#' `list_resources()` returns all of the resources associated #' with a dataset #' #' @inheritParams get_dataset diff --git a/man/get_dataset_additional_info.Rd b/man/get_dataset_additional_info.Rd new file mode 100644 index 0000000..675faff --- /dev/null +++ b/man/get_dataset_additional_info.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_dataset_additional_info.R +\name{get_dataset_additional_info} +\alias{get_dataset_additional_info} +\title{get a datasets additional info} +\usage{ +get_dataset_additional_info(dataset_name) +} +\arguments{ +\item{dataset_name}{name of the dataset as found on +\href{https://www.opendata.nhs.scot/}{NHS Open Data platform}} +} +\value{ +a \link[tibble:tibble-package]{tibble} with the data +} +\description{ +\code{get_dataset_additional_info()}returns a list of dataset names alongwith additional information, +such as the amount of resources and the date it was last updated +} +\examples{ +get_dataset("gp-practice-populations") +} diff --git a/man/get_datasets_additional_info.Rd b/man/get_datasets_additional_info.Rd deleted file mode 100644 index f126bfb..0000000 --- a/man/get_datasets_additional_info.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_dataset_additional_info.R -\name{get_datasets_additional_info} -\alias{get_datasets_additional_info} -\title{returns a list of dataset names alongwith additional information, -such as the amount of resources and the date it was last updated} -\usage{ -get_datasets_additional_info(dataset_name) -} -\arguments{ -\item{dataset_name}{} -} -\description{ -returns a list of dataset names alongwith additional information, -such as the amount of resources and the date it was last updated -} diff --git a/man/get_latest_resource.Rd b/man/get_latest_resource.Rd new file mode 100644 index 0000000..0ccc8c4 --- /dev/null +++ b/man/get_latest_resource.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_latest_resource.R +\name{get_latest_resource} +\alias{get_latest_resource} +\title{Get the latest resource from a data set} +\usage{ +get_latest_resource( + dataset_name, + rows = NULL, + row_filters = NULL, + col_select = NULL, + include_context = FALSE +) +} +\arguments{ +\item{dataset_name}{name of the dataset as found on +\href{https://www.opendata.nhs.scot/}{NHS Open Data platform}} + +\item{rows}{(optional) specify the max number of rows to return.} + +\item{row_filters}{(optional) a named list or vector that specifies values of +columns/fields to keep. +e.g. list(Date = 20220216, Sex = "Female").} + +\item{col_select}{(optional) a character vector containing the names of +desired columns/fields. +e.g. c("Date", "Sex").} + +\item{include_context}{(optional) If \code{TRUE} additional information about the +resource will be added as columns to the data, including the resource ID, the +resource name, the creation date and the last modified/updated date.} +} +\value{ +a \link[tibble:tibble-package]{tibble} with the data +} +\description{ +There are some datasets on the open data platform that +keep historic resources instead of updating existing ones. +For these it is useful to be able to retrieve the latest +resource. These data sets include: +} +\details{ +gp-practice-populations +gp-practice-contact-details-and-list-size +nhsscotland-payments-to-general-practice, +dental-practices-and-patient-registrations, +general-practitioner-contact-details, +prescribed-dispensed, +prescriptions-in-the-community, +community-pharmacy-contractor-activity +} +\examples{ +dataset_name <- "gp-practice-contact-details-and-list-sizes" + +data <- get_latest_resource(dataset_name) + +filters <- list("Postcode" = "DD11 1ES") +wanted_cols <- c("Practice", "Postcode", "Dispensing") + +filtered_data <- get_latest_resource( + dataset_name = dataset_name, + row_filters = filters, + col_select = wanted_cols +) + +} diff --git a/man/get_latest_resource_id.Rd b/man/get_latest_resource_id.Rd new file mode 100644 index 0000000..c2c1f20 --- /dev/null +++ b/man/get_latest_resource_id.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_latest_resource_id.R +\name{get_latest_resource_id} +\alias{get_latest_resource_id} +\title{get_latest_resource_id} +\usage{ +get_latest_resource_id(dataset_name) +} +\arguments{ +\item{dataset_name}{name of the dataset as found on +\href{https://www.opendata.nhs.scot/}{NHS Open Data platform}} +} +\value{ +a \link[tibble:tibble-package]{tibble} with the data +} +\description{ +to be confident that the resource returned is the one intended +two conditions have to be met. It has to appear at the top of +of the resource list as shown on the open data platform. +The order they are returned via the api is the same +as they appear on the open data platfrom. It also +has to have the most recent date created +} +\details{ +There are only some datasets that this functionality +is relevent to, these are listed within applicable +datasets and are the datasets that keep historic +resources instead of over writting them +} diff --git a/man/list_datasets.Rd b/man/list_datasets.Rd index e64b511..02ff91f 100644 --- a/man/list_datasets.Rd +++ b/man/list_datasets.Rd @@ -4,19 +4,14 @@ \alias{list_datasets} \title{Lists all available datasets} \usage{ -list_datasets(include_additional_info = FALSE) -} -\arguments{ -\item{include_additional_info}{(optional) set to true to return the number -of resources per dataset and the date the dataset was last updated} +list_datasets() } \value{ -A character vector. +A tibble. } \description{ \code{list_datasets()} shows all of the datasets hosted on the phs open data platform. } \examples{ head(list_datasets()) -head(list_datasets(include_additional_info = TRUE)) } diff --git a/man/list_resources.Rd b/man/list_resources.Rd index 4a04845..30767f9 100644 --- a/man/list_resources.Rd +++ b/man/list_resources.Rd @@ -14,7 +14,7 @@ list_resources(dataset_name) a \link[tibble:tibble-package]{tibble} with the data } \description{ -list_resources() returns all of the resources associated +\code{list_resources()} returns all of the resources associated with a dataset } \examples{ From 8142c5e6e1f5e64b77e0fc0d35b354ee7822b192 Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 20 Jun 2024 16:52:38 +0100 Subject: [PATCH 63/93] typo in example --- R/get_latest_resource.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index f499866..0977e53 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -36,7 +36,7 @@ #' data <- get_latest_resource(dataset_name) #' #' filters <- list("Postcode" = "DD11 1ES") -#' wanted_cols <- c("Practice", "Postcode", "Dispensing") +#' wanted_cols <- c("PracticeCode", "Postcode", "Dispensing") #' #' filtered_data <- get_latest_resource( #' dataset_name = dataset_name, From 975cf6231422d8f0f00abacb6c7060fe5b8f88de Mon Sep 17 00:00:00 2001 From: ross-hull Date: Wed, 3 Jul 2024 11:05:22 +0000 Subject: [PATCH 64/93] Update documentation --- DESCRIPTION | 2 +- man/get_latest_resource.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8f04ce9..1ded194 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,4 +33,4 @@ Config/testthat/parallel: true Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 diff --git a/man/get_latest_resource.Rd b/man/get_latest_resource.Rd index 0ccc8c4..0a9058f 100644 --- a/man/get_latest_resource.Rd +++ b/man/get_latest_resource.Rd @@ -55,7 +55,7 @@ dataset_name <- "gp-practice-contact-details-and-list-sizes" data <- get_latest_resource(dataset_name) filters <- list("Postcode" = "DD11 1ES") -wanted_cols <- c("Practice", "Postcode", "Dispensing") +wanted_cols <- c("PracticeCode", "Postcode", "Dispensing") filtered_data <- get_latest_resource( dataset_name = dataset_name, From afd94aa7d2ecc459ccc9a92838801c1570a9c803 Mon Sep 17 00:00:00 2001 From: ross hull Date: Wed, 3 Jul 2024 15:18:07 +0100 Subject: [PATCH 65/93] add simple test for list_datasets.R --- tests/testthat/test-list_datasets.R | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 tests/testthat/test-list_datasets.R diff --git a/tests/testthat/test-list_datasets.R b/tests/testthat/test-list_datasets.R new file mode 100644 index 0000000..852acee --- /dev/null +++ b/tests/testthat/test-list_datasets.R @@ -0,0 +1,6 @@ +test_that("returns more than 0 datasets", { + #select the first row of the tibble and get the + #number of rows. If no datasets were returned + #this will be 0 + expect_equal(nrow(slice(list_datasets(), 1)), 1) +}) From 6bafa200e372c504e9c04d02f0258811c0c72e9e Mon Sep 17 00:00:00 2001 From: ross-hull Date: Wed, 3 Jul 2024 14:20:04 +0000 Subject: [PATCH 66/93] Style code (GHA) --- R/get_dataset_additional_info.R | 26 +++++++------ R/get_latest_resource.R | 22 +++++------ R/get_latest_resource_id.R | 60 ++++++++++++++--------------- R/list_datasets.R | 6 +-- tests/testthat/test-list_datasets.R | 6 +-- 5 files changed, 56 insertions(+), 64 deletions(-) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index 5907898..679a118 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -9,31 +9,33 @@ #' @export #' @examples #' get_dataset("gp-practice-populations") -get_dataset_additional_info <- function(dataset_name){ - #define query +get_dataset_additional_info <- function(dataset_name) { + # define query query <- list("id" = dataset_name) - #fetch the data + # fetch the data content <- phs_GET("package_show", query) - #get the amount of resources + # get the amount of resources amount_of_resources <- content$result$resources %>% length() - #get the last recourse created and modified dates - last_resource_created_date <- purrr::map_chr(content$result$resources, ~.$created) - last_resource_modified_date <- purrr::map_chr(content$result$resources, ~.$last_modified) + # get the last recourse created and modified dates + last_resource_created_date <- purrr::map_chr(content$result$resources, ~ .$created) + last_resource_modified_date <- purrr::map_chr(content$result$resources, ~ .$last_modified) - #get the latest between the created and modified dates and change to datetime format + # get the latest between the created and modified dates and change to datetime format most_recent_resource_date <- max(last_resource_modified_date, last_resource_created_date) %>% as.POSIXct(format = "%FT%X", tz = "UTC") - #create tibble to return - return_value <- tibble::tibble("name" = dataset_name, - "amount_of_resources" = amount_of_resources, - "most_recent_resource_update" = most_recent_resource_date) + # create tibble to return + return_value <- tibble::tibble( + "name" = dataset_name, + "amount_of_resources" = amount_of_resources, + "most_recent_resource_update" = most_recent_resource_date + ) return(return_value) } diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index 0977e53..b760e48 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -48,19 +48,15 @@ get_latest_resource <- function(dataset_name, rows = NULL, row_filters = NULL, col_select = NULL, - include_context = FALSE){ - #get the latest resource id + include_context = FALSE) { + # get the latest resource id id <- get_latest_resource_id(dataset_name) - return_value <- get_resource(id, - rows, - row_filters, - col_select, - include_context) + return_value <- get_resource( + id, + rows, + row_filters, + col_select, + include_context + ) } - - - - - - diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index 9e64433..c22b4d2 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -17,11 +17,13 @@ #' @return a [tibble][tibble::tibble-package] with the data #' -get_latest_resource_id <- function(dataset_name){ - applicable_datasets <- c("gp-practice-populations", "gp-practice-contact-details-and-list-sizes", - "nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations", - "general-practitioner-contact-details", "prescribed-dispensed", - "prescriptions-in-the-community", "community-pharmacy-contractor-activity") +get_latest_resource_id <- function(dataset_name) { + applicable_datasets <- c( + "gp-practice-populations", "gp-practice-contact-details-and-list-sizes", + "nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations", + "general-practitioner-contact-details", "prescribed-dispensed", + "prescriptions-in-the-community", "community-pharmacy-contractor-activity" + ) # throw error if name type/format is invalid check_dataset_name(dataset_name) @@ -34,9 +36,9 @@ get_latest_resource_id <- function(dataset_name){ ) - #check if data set is within applicable datasets - #throw error if not - if(!dataset_name %in% applicable_datasets){ + # check if data set is within applicable datasets + # throw error if not + if (!dataset_name %in% applicable_datasets) { cli::cli_abort(c( "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets", "x" = "Please see get_latest_reource documentation.", @@ -51,50 +53,44 @@ get_latest_resource_id <- function(dataset_name){ suggest_dataset_name(dataset_name) } - #send the api request + # send the api request query <- list("id" = dataset_name) content <- try( phs_GET("package_show", query), silent = TRUE ) - #retrieve the resource id's from returned contect + # retrieve the resource id's from returned contect all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) - #add the id, created date and last_modified to a dataframe + # add the id, created date and last_modified to a dataframe id <- c() created_date <- c() modified_date <- c() - for(i in content$result$resources){ - id <- append(id, i$id) + for (i in content$result$resources) { + id <- append(id, i$id) created_date <- append(created_date, i$created) modified_date <- append(modified_date, i$last_modified) } - all_id_data <- tibble::tibble(id = id, - created_date = strptime(created_date, format = "%FT%X", tz = "UTC"), - modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC")) %>% + all_id_data <- tibble::tibble( + id = id, + created_date = strptime(created_date, format = "%FT%X", tz = "UTC"), + modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC") + ) %>% dplyr::mutate(most_recent_date_created = max(created_date)) - #get the first row of the rources, this will be the same that appears on the top - #on the open data platform + # get the first row of the rources, this will be the same that appears on the top + # on the open data platform all_id_data_first_row <- all_id_data %>% dplyr::slice(1) - #if the resource at the top as appearing on the open data platform also has the most - #recent date created, return it. Otherwise return warning - if(all_id_data_first_row$created_date == all_id_data_first_row$most_recent_date_created){ + # if the resource at the top as appearing on the open data platform also has the most + # recent date created, return it. Otherwise return warning + if (all_id_data_first_row$created_date == all_id_data_first_row$most_recent_date_created) { return(all_id_data_first_row$id) - }else(warning("most recent id could not be identified")) - - - + } else { + (warning("most recent id could not be identified")) + } } - - - - - - - diff --git a/R/list_datasets.R b/R/list_datasets.R index e640cfd..70f9d8e 100644 --- a/R/list_datasets.R +++ b/R/list_datasets.R @@ -14,7 +14,5 @@ list_datasets <- function() { data_sets <- tibble::tibble("name" = unlist(data_sets)) - return(data_sets)} - - - + return(data_sets) +} diff --git a/tests/testthat/test-list_datasets.R b/tests/testthat/test-list_datasets.R index 852acee..497608b 100644 --- a/tests/testthat/test-list_datasets.R +++ b/tests/testthat/test-list_datasets.R @@ -1,6 +1,6 @@ test_that("returns more than 0 datasets", { - #select the first row of the tibble and get the - #number of rows. If no datasets were returned - #this will be 0 + # select the first row of the tibble and get the + # number of rows. If no datasets were returned + # this will be 0 expect_equal(nrow(slice(list_datasets(), 1)), 1) }) From 063a84b902cd6a876330de8741a4f52f5607e2f0 Mon Sep 17 00:00:00 2001 From: ross hull Date: Wed, 3 Jul 2024 15:43:07 +0100 Subject: [PATCH 67/93] add test to test-list_datasets.R --- tests/testthat/test-list_datasets.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/testthat/test-list_datasets.R b/tests/testthat/test-list_datasets.R index 497608b..0252d7a 100644 --- a/tests/testthat/test-list_datasets.R +++ b/tests/testthat/test-list_datasets.R @@ -4,3 +4,7 @@ test_that("returns more than 0 datasets", { # this will be 0 expect_equal(nrow(slice(list_datasets(), 1)), 1) }) + +test_that("returns data in the expected format", { + expect_s3_class(list_datasets(), "tbl_df") +}) From 9e5b32af0cc45a96bf6e7ac348c405fbffe69d0e Mon Sep 17 00:00:00 2001 From: ross hull Date: Wed, 3 Jul 2024 15:46:45 +0100 Subject: [PATCH 68/93] create test for list_resources.R --- tests/testthat/test-list_resources.R | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 tests/testthat/test-list_resources.R diff --git a/tests/testthat/test-list_resources.R b/tests/testthat/test-list_resources.R new file mode 100644 index 0000000..4c70f51 --- /dev/null +++ b/tests/testthat/test-list_resources.R @@ -0,0 +1,5 @@ +test_that("returns data in the expected format", { + expect_s3_class(list_datasets(), "tbl_df") +}) + + From bdd2391f79cfd3c9be09ac8acf8d1efab0129a34 Mon Sep 17 00:00:00 2001 From: ross hull Date: Wed, 3 Jul 2024 15:55:06 +0100 Subject: [PATCH 69/93] update documentation mistake --- R/get_latest_resource_id.R | 2 +- man/get_latest_resource_id.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index c22b4d2..0d61d3c 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -14,7 +14,7 @@ #' #' @inheritParams get_dataset #' -#' @return a [tibble][tibble::tibble-package] with the data +#' @return a [string] with the resource id #' get_latest_resource_id <- function(dataset_name) { diff --git a/man/get_latest_resource_id.Rd b/man/get_latest_resource_id.Rd index c2c1f20..28944de 100644 --- a/man/get_latest_resource_id.Rd +++ b/man/get_latest_resource_id.Rd @@ -11,7 +11,7 @@ get_latest_resource_id(dataset_name) \href{https://www.opendata.nhs.scot/}{NHS Open Data platform}} } \value{ -a \link[tibble:tibble-package]{tibble} with the data +a \link{string} with the resource id } \description{ to be confident that the resource returned is the one intended From 1653bfadda95e8aef325a9b2f4b07932dd86df22 Mon Sep 17 00:00:00 2001 From: ross hull Date: Wed, 3 Jul 2024 16:05:39 +0100 Subject: [PATCH 70/93] add 2 basic tests for get_latest_resource_id.R --- tests/testthat/test-get_latest_resource_id.R | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 tests/testthat/test-get_latest_resource_id.R diff --git a/tests/testthat/test-get_latest_resource_id.R b/tests/testthat/test-get_latest_resource_id.R new file mode 100644 index 0000000..c2e7569 --- /dev/null +++ b/tests/testthat/test-get_latest_resource_id.R @@ -0,0 +1,7 @@ +test_that("returns data for a dataset that is listed", { + expect_no_error(get_latest_resource_id("gp-practice-populations")) +}) + +test_that("returns error for a dataset that is not listed", { + expect_error(get_latest_resource_id("hospital-codes")) +}) From b275ec791a17b5e66cce93916476bcd2a8897667 Mon Sep 17 00:00:00 2001 From: ross-hull Date: Wed, 3 Jul 2024 15:07:10 +0000 Subject: [PATCH 71/93] Style code (GHA) --- tests/testthat/test-list_resources.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/testthat/test-list_resources.R b/tests/testthat/test-list_resources.R index 4c70f51..5434300 100644 --- a/tests/testthat/test-list_resources.R +++ b/tests/testthat/test-list_resources.R @@ -1,5 +1,3 @@ test_that("returns data in the expected format", { expect_s3_class(list_datasets(), "tbl_df") }) - - From 1f630b5abeeebec04a864d9ffd055d853b847d18 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Thu, 4 Jul 2024 11:50:32 +0100 Subject: [PATCH 72/93] Fix some typos and format the documentation --- R/get_dataset_additional_info.R | 5 +++-- R/get_latest_resource.R | 16 ++++++++-------- R/get_latest_resource_id.R | 8 +++----- man/get_dataset_additional_info.Rd | 5 +++-- man/get_latest_resource.Rd | 18 ++++++++++-------- man/get_latest_resource_id.Rd | 6 +++--- 6 files changed, 30 insertions(+), 28 deletions(-) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index 679a118..ebf5124 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -1,7 +1,8 @@ #' get a datasets additional info #' -#' `get_dataset_additional_info()`returns a list of dataset names alongwith additional information, -#' such as the amount of resources and the date it was last updated +#' `get_dataset_additional_info()` returns a list of dataset names along with +#' additional information, such as the amount of resources and the date it was +#' last updated. #' #' @inheritParams get_dataset #' diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index b760e48..b7687d0 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -5,14 +5,14 @@ #' For these it is useful to be able to retrieve the latest #' resource. These data sets include: #' -#' gp-practice-populations -#' gp-practice-contact-details-and-list-size -#' nhsscotland-payments-to-general-practice, -#' dental-practices-and-patient-registrations, -#' general-practitioner-contact-details, -#' prescribed-dispensed, -#' prescriptions-in-the-community, -#' community-pharmacy-contractor-activity +#' * gp-practice-populations +#' * gp-practice-contact-details-and-list-size +#' * nhsscotland-payments-to-general-practice, +#' * dental-practices-and-patient-registrations, +#' * general-practitioner-contact-details, +#' * prescribed-dispensed, +#' * prescriptions-in-the-community, +#' * community-pharmacy-contractor-activity #' #' @param dataset_name name of the dataset as found on #' \href{https://www.opendata.nhs.scot/}{NHS Open Data platform} diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index 0d61d3c..49a126f 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -4,19 +4,17 @@ #' two conditions have to be met. It has to appear at the top of #' of the resource list as shown on the open data platform. #' The order they are returned via the api is the same -#' as they appear on the open data platfrom. It also +#' as they appear on the open data platform. It also #' has to have the most recent date created #' #' There are only some datasets that this functionality -#' is relevent to, these are listed within applicable +#' is relevant to, these are listed within applicable #' datasets and are the datasets that keep historic -#' resources instead of over writting them +#' resources instead of over writing them #' #' @inheritParams get_dataset #' #' @return a [string] with the resource id -#' - get_latest_resource_id <- function(dataset_name) { applicable_datasets <- c( "gp-practice-populations", "gp-practice-contact-details-and-list-sizes", diff --git a/man/get_dataset_additional_info.Rd b/man/get_dataset_additional_info.Rd index 675faff..69a5bf3 100644 --- a/man/get_dataset_additional_info.Rd +++ b/man/get_dataset_additional_info.Rd @@ -14,8 +14,9 @@ get_dataset_additional_info(dataset_name) a \link[tibble:tibble-package]{tibble} with the data } \description{ -\code{get_dataset_additional_info()}returns a list of dataset names alongwith additional information, -such as the amount of resources and the date it was last updated +\code{get_dataset_additional_info()} returns a list of dataset names along with +additional information, such as the amount of resources and the date it was +last updated. } \examples{ get_dataset("gp-practice-populations") diff --git a/man/get_latest_resource.Rd b/man/get_latest_resource.Rd index 0a9058f..09ab71b 100644 --- a/man/get_latest_resource.Rd +++ b/man/get_latest_resource.Rd @@ -40,14 +40,16 @@ For these it is useful to be able to retrieve the latest resource. These data sets include: } \details{ -gp-practice-populations -gp-practice-contact-details-and-list-size -nhsscotland-payments-to-general-practice, -dental-practices-and-patient-registrations, -general-practitioner-contact-details, -prescribed-dispensed, -prescriptions-in-the-community, -community-pharmacy-contractor-activity +\itemize{ +\item gp-practice-populations +\item gp-practice-contact-details-and-list-size +\item nhsscotland-payments-to-general-practice, +\item dental-practices-and-patient-registrations, +\item general-practitioner-contact-details, +\item prescribed-dispensed, +\item prescriptions-in-the-community, +\item community-pharmacy-contractor-activity +} } \examples{ dataset_name <- "gp-practice-contact-details-and-list-sizes" diff --git a/man/get_latest_resource_id.Rd b/man/get_latest_resource_id.Rd index 28944de..96edc6c 100644 --- a/man/get_latest_resource_id.Rd +++ b/man/get_latest_resource_id.Rd @@ -18,12 +18,12 @@ to be confident that the resource returned is the one intended two conditions have to be met. It has to appear at the top of of the resource list as shown on the open data platform. The order they are returned via the api is the same -as they appear on the open data platfrom. It also +as they appear on the open data platform. It also has to have the most recent date created } \details{ There are only some datasets that this functionality -is relevent to, these are listed within applicable +is relevant to, these are listed within applicable datasets and are the datasets that keep historic -resources instead of over writting them +resources instead of over writing them } From b5edfac323b56ae7df5b50c3ea7c6d133079fbf2 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Thu, 4 Jul 2024 12:00:00 +0100 Subject: [PATCH 73/93] Update tests/testthat/test-list_datasets.R --- tests/testthat/test-list_datasets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-list_datasets.R b/tests/testthat/test-list_datasets.R index 0252d7a..341326b 100644 --- a/tests/testthat/test-list_datasets.R +++ b/tests/testthat/test-list_datasets.R @@ -2,7 +2,7 @@ test_that("returns more than 0 datasets", { # select the first row of the tibble and get the # number of rows. If no datasets were returned # this will be 0 - expect_equal(nrow(slice(list_datasets(), 1)), 1) + expect_equal(nrow(dplyr::slice(list_datasets(), 1)), 1) }) test_that("returns data in the expected format", { From f5f006b4aec9f9f880a639f5851493b05ec8ac53 Mon Sep 17 00:00:00 2001 From: ross hull Date: Fri, 5 Jul 2024 15:12:22 +0100 Subject: [PATCH 74/93] remove [ ] within roxygen comment --- R/get_latest_resource_id.R | 2 +- man/get_latest_resource_id.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index 49a126f..35cf973 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -14,7 +14,7 @@ #' #' @inheritParams get_dataset #' -#' @return a [string] with the resource id +#' @return a string with the resource id get_latest_resource_id <- function(dataset_name) { applicable_datasets <- c( "gp-practice-populations", "gp-practice-contact-details-and-list-sizes", diff --git a/man/get_latest_resource_id.Rd b/man/get_latest_resource_id.Rd index 96edc6c..d2d8b95 100644 --- a/man/get_latest_resource_id.Rd +++ b/man/get_latest_resource_id.Rd @@ -11,7 +11,7 @@ get_latest_resource_id(dataset_name) \href{https://www.opendata.nhs.scot/}{NHS Open Data platform}} } \value{ -a \link{string} with the resource id +a string with the resource id } \description{ to be confident that the resource returned is the one intended From 4b5c0668344638540b5061fdb2be2bbcfe0a4108 Mon Sep 17 00:00:00 2001 From: ross hull Date: Fri, 5 Jul 2024 15:25:19 +0100 Subject: [PATCH 75/93] update documentation --- R/get_dataset_additional_info.R | 7 ++++--- man/get_dataset_additional_info.Rd | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index ebf5124..7d76031 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -1,8 +1,9 @@ #' get a datasets additional info #' -#' `get_dataset_additional_info()` returns a list of dataset names along with -#' additional information, such as the amount of resources and the date it was -#' last updated. +#' `get_dataset_additional_info()` returns a tibble of dataset names along with +#' the amount of resources it has and the date it was last updated.Last updated +#' is taken to mean the most recent date a resource within the dataset was +#' created or modified. #' #' @inheritParams get_dataset #' diff --git a/man/get_dataset_additional_info.Rd b/man/get_dataset_additional_info.Rd index 69a5bf3..ca65cb9 100644 --- a/man/get_dataset_additional_info.Rd +++ b/man/get_dataset_additional_info.Rd @@ -14,9 +14,10 @@ get_dataset_additional_info(dataset_name) a \link[tibble:tibble-package]{tibble} with the data } \description{ -\code{get_dataset_additional_info()} returns a list of dataset names along with -additional information, such as the amount of resources and the date it was -last updated. +\code{get_dataset_additional_info()} returns a tibble of dataset names along with +the amount of resources it has and the date it was last updated.Last updated +is taken to mean the most recent date a resource within the dataset was +created or modified. } \examples{ get_dataset("gp-practice-populations") From 1c512b961aec229f1d4740b0234f6101c09d299d Mon Sep 17 00:00:00 2001 From: ross hull Date: Fri, 5 Jul 2024 15:40:11 +0100 Subject: [PATCH 76/93] update documentation --- R/get_latest_resource.R | 16 +++++++++------- man/get_latest_resource.Rd | 18 ++++++++++-------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index b7687d0..f03aec2 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -1,17 +1,19 @@ #' Get the latest resource from a data set #' +#' `get_dataset_additional_info()` returns the most +#' recently uploaded resource to a dataset +#' #' There are some datasets on the open data platform that #' keep historic resources instead of updating existing ones. #' For these it is useful to be able to retrieve the latest -#' resource. These data sets include: -#' +#' resource. As of 5.7.2024 these data sets include: #' * gp-practice-populations #' * gp-practice-contact-details-and-list-size -#' * nhsscotland-payments-to-general-practice, -#' * dental-practices-and-patient-registrations, -#' * general-practitioner-contact-details, -#' * prescribed-dispensed, -#' * prescriptions-in-the-community, +#' * nhsscotland-payments-to-general-practice +#' * dental-practices-and-patient-registrations +#' * general-practitioner-contact-details +#' * prescribed-dispensed +#' * prescriptions-in-the-community #' * community-pharmacy-contractor-activity #' #' @param dataset_name name of the dataset as found on diff --git a/man/get_latest_resource.Rd b/man/get_latest_resource.Rd index 09ab71b..de3d496 100644 --- a/man/get_latest_resource.Rd +++ b/man/get_latest_resource.Rd @@ -34,20 +34,22 @@ resource name, the creation date and the last modified/updated date.} a \link[tibble:tibble-package]{tibble} with the data } \description{ +\code{get_dataset_additional_info()} returns the most +recently uploaded resource to a dataset +} +\details{ There are some datasets on the open data platform that keep historic resources instead of updating existing ones. For these it is useful to be able to retrieve the latest -resource. These data sets include: -} -\details{ +resource. As of 5.7.2024 these data sets include: \itemize{ \item gp-practice-populations \item gp-practice-contact-details-and-list-size -\item nhsscotland-payments-to-general-practice, -\item dental-practices-and-patient-registrations, -\item general-practitioner-contact-details, -\item prescribed-dispensed, -\item prescriptions-in-the-community, +\item nhsscotland-payments-to-general-practice +\item dental-practices-and-patient-registrations +\item general-practitioner-contact-details +\item prescribed-dispensed +\item prescriptions-in-the-community \item community-pharmacy-contractor-activity } } From 4bbbbefb18e376723fd6f6b7ffc4b1c772a9c949 Mon Sep 17 00:00:00 2001 From: ross hull Date: Fri, 5 Jul 2024 16:02:56 +0100 Subject: [PATCH 77/93] update documentation --- R/get_latest_resource.R | 2 +- R/get_latest_resource_id.R | 6 ++++-- man/get_latest_resource.Rd | 2 +- man/get_latest_resource_id.Rd | 3 ++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index f03aec2..defbf8b 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -8,7 +8,7 @@ #' For these it is useful to be able to retrieve the latest #' resource. As of 5.7.2024 these data sets include: #' * gp-practice-populations -#' * gp-practice-contact-details-and-list-size +#' * gp-practice-contact-details-and-list-sizes #' * nhsscotland-payments-to-general-practice #' * dental-practices-and-patient-registrations #' * general-practitioner-contact-details diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index 35cf973..f693810 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -10,7 +10,7 @@ #' There are only some datasets that this functionality #' is relevant to, these are listed within applicable #' datasets and are the datasets that keep historic -#' resources instead of over writing them +#' resources instead of over writing them. #' #' @inheritParams get_dataset #' @@ -38,7 +38,9 @@ get_latest_resource_id <- function(dataset_name) { # throw error if not if (!dataset_name %in% applicable_datasets) { cli::cli_abort(c( - "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets", + "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets. + These are:\n + {.var {applicable_datasets}}", "x" = "Please see get_latest_reource documentation.", "i" = "You can find dataset names in the URL of a dataset's page on {.url www.opendata.nhs.scot}." diff --git a/man/get_latest_resource.Rd b/man/get_latest_resource.Rd index de3d496..26b2202 100644 --- a/man/get_latest_resource.Rd +++ b/man/get_latest_resource.Rd @@ -44,7 +44,7 @@ For these it is useful to be able to retrieve the latest resource. As of 5.7.2024 these data sets include: \itemize{ \item gp-practice-populations -\item gp-practice-contact-details-and-list-size +\item gp-practice-contact-details-and-list-sizes \item nhsscotland-payments-to-general-practice \item dental-practices-and-patient-registrations \item general-practitioner-contact-details diff --git a/man/get_latest_resource_id.Rd b/man/get_latest_resource_id.Rd index d2d8b95..cea2d26 100644 --- a/man/get_latest_resource_id.Rd +++ b/man/get_latest_resource_id.Rd @@ -25,5 +25,6 @@ has to have the most recent date created There are only some datasets that this functionality is relevant to, these are listed within applicable datasets and are the datasets that keep historic -resources instead of over writing them +resources instead of over writing them. These data +sets are listed within the error message. } From e2e9cbd0f35c32a3990ce6df630b4d78c00cf695 Mon Sep 17 00:00:00 2001 From: ross hull Date: Fri, 5 Jul 2024 16:22:09 +0100 Subject: [PATCH 78/93] add tests --- tests/testthat/test-get_dataset_additional_info.R | 7 +++++++ tests/testthat/test-get_latest_resource.R | 10 ++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/testthat/test-get_dataset_additional_info.R create mode 100644 tests/testthat/test-get_latest_resource.R diff --git a/tests/testthat/test-get_dataset_additional_info.R b/tests/testthat/test-get_dataset_additional_info.R new file mode 100644 index 0000000..2ef9751 --- /dev/null +++ b/tests/testthat/test-get_dataset_additional_info.R @@ -0,0 +1,7 @@ +test_that("returns data in the expected format", { + expect_s3_class(get_dataset_additional_info("weekly-accident-and-emergency-activity-and-waiting-times"), "tbl_df") +}) + +test_that("returned tibble has one row", { + expect_equal(nrow(get_dataset_additional_info("weekly-accident-and-emergency-activity-and-waiting-times")), 1) +}) diff --git a/tests/testthat/test-get_latest_resource.R b/tests/testthat/test-get_latest_resource.R new file mode 100644 index 0000000..341326b --- /dev/null +++ b/tests/testthat/test-get_latest_resource.R @@ -0,0 +1,10 @@ +test_that("returns more than 0 datasets", { + # select the first row of the tibble and get the + # number of rows. If no datasets were returned + # this will be 0 + expect_equal(nrow(dplyr::slice(list_datasets(), 1)), 1) +}) + +test_that("returns data in the expected format", { + expect_s3_class(list_datasets(), "tbl_df") +}) From e3b82fe01d74dae4d1fd3e64118a11aafd8e4285 Mon Sep 17 00:00:00 2001 From: ross-hull Date: Fri, 5 Jul 2024 15:24:59 +0000 Subject: [PATCH 79/93] Update documentation --- man/get_latest_resource_id.Rd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/man/get_latest_resource_id.Rd b/man/get_latest_resource_id.Rd index cea2d26..d79e6ae 100644 --- a/man/get_latest_resource_id.Rd +++ b/man/get_latest_resource_id.Rd @@ -25,6 +25,5 @@ has to have the most recent date created There are only some datasets that this functionality is relevant to, these are listed within applicable datasets and are the datasets that keep historic -resources instead of over writing them. These data -sets are listed within the error message. +resources instead of over writing them. } From 9b01c7530dde9a92c3f6e3712638adab6b5c4a48 Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 11 Jul 2024 14:06:07 +0100 Subject: [PATCH 80/93] update test --- tests/testthat/test-get_latest_resource.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-get_latest_resource.R b/tests/testthat/test-get_latest_resource.R index 341326b..00d672e 100644 --- a/tests/testthat/test-get_latest_resource.R +++ b/tests/testthat/test-get_latest_resource.R @@ -2,9 +2,9 @@ test_that("returns more than 0 datasets", { # select the first row of the tibble and get the # number of rows. If no datasets were returned # this will be 0 - expect_equal(nrow(dplyr::slice(list_datasets(), 1)), 1) + expect_equal(nrow(dplyr::slice(get_latest_resource(), 1)), 1) }) test_that("returns data in the expected format", { - expect_s3_class(list_datasets(), "tbl_df") + expect_s3_class(get_latest_resource(), "tbl_df") }) From f8c333adcf7f56d3d9dd29178683e404181aa43f Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Thu, 11 Jul 2024 14:06:39 +0100 Subject: [PATCH 81/93] Update R/get_dataset_additional_info.R Co-authored-by: James McMahon --- R/get_dataset_additional_info.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index 7d76031..b0dc359 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -10,7 +10,7 @@ #' @return a [tibble][tibble::tibble-package] with the data #' @export #' @examples -#' get_dataset("gp-practice-populations") +#' get_dataset_additional_info("gp-practice-populations") get_dataset_additional_info <- function(dataset_name) { # define query From b890d3b6af9739e918039fe640bb8b17e4d8ace1 Mon Sep 17 00:00:00 2001 From: ross-hull Date: Thu, 11 Jul 2024 13:07:49 +0000 Subject: [PATCH 82/93] Update documentation --- man/get_dataset_additional_info.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/get_dataset_additional_info.Rd b/man/get_dataset_additional_info.Rd index ca65cb9..0b856bb 100644 --- a/man/get_dataset_additional_info.Rd +++ b/man/get_dataset_additional_info.Rd @@ -20,5 +20,5 @@ is taken to mean the most recent date a resource within the dataset was created or modified. } \examples{ -get_dataset("gp-practice-populations") +get_dataset_additional_info("gp-practice-populations") } From 2c57c02a1b00cfe750a476625959c6133de63c75 Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Thu, 11 Jul 2024 14:11:51 +0100 Subject: [PATCH 83/93] Update R/list_datasets.R Co-authored-by: James McMahon --- R/list_datasets.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/list_datasets.R b/R/list_datasets.R index 70f9d8e..91aa375 100644 --- a/R/list_datasets.R +++ b/R/list_datasets.R @@ -9,9 +9,9 @@ #' head(list_datasets()) list_datasets <- function() { # fetch the data - data_sets <- phs_GET("package_list", "")$result + content <- phs_GET("package_list", "") - data_sets <- tibble::tibble("name" = unlist(data_sets)) + data_sets <- tibble::tibble("name" = unlist(content$result)) return(data_sets) From a842e0efd4b0f5770f1af72076a42a4172c65671 Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 11 Jul 2024 14:44:48 +0100 Subject: [PATCH 84/93] fix type --- tests/testthat/test-get_latest_resource.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-get_latest_resource.R b/tests/testthat/test-get_latest_resource.R index 00d672e..be9622f 100644 --- a/tests/testthat/test-get_latest_resource.R +++ b/tests/testthat/test-get_latest_resource.R @@ -2,9 +2,9 @@ test_that("returns more than 0 datasets", { # select the first row of the tibble and get the # number of rows. If no datasets were returned # this will be 0 - expect_equal(nrow(dplyr::slice(get_latest_resource(), 1)), 1) + expect_equal(nrow(dplyr::slice(get_latest_resource("gp-practice-populations"), 1)), 1) }) test_that("returns data in the expected format", { - expect_s3_class(get_latest_resource(), "tbl_df") + expect_s3_class(get_latest_resource("gp-practice-populations"), "tbl_df") }) From 312dea4cdd4cffd9f5891c8c8918b9c7cd3f70d0 Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 11 Jul 2024 14:47:18 +0100 Subject: [PATCH 85/93] add error message to expect_error test --- tests/testthat/test-get_latest_resource_id.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-get_latest_resource_id.R b/tests/testthat/test-get_latest_resource_id.R index c2e7569..db0cc45 100644 --- a/tests/testthat/test-get_latest_resource_id.R +++ b/tests/testthat/test-get_latest_resource_id.R @@ -3,5 +3,5 @@ test_that("returns data for a dataset that is listed", { }) test_that("returns error for a dataset that is not listed", { - expect_error(get_latest_resource_id("hospital-codes")) + expect_error(get_latest_resource_id("hospital-codes"), "The dataset name supplied `hospital-codes` is not within the applicable datasets") }) From 047266b12f6e069883c8150289d7d6b1405679a3 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 6 Aug 2024 15:37:14 +0100 Subject: [PATCH 86/93] Only run tests if the platform is online --- tests/testthat/test-get_dataset_additional_info.R | 2 ++ tests/testthat/test-list_datasets.R | 2 ++ tests/testthat/test-list_resources.R | 2 ++ 3 files changed, 6 insertions(+) diff --git a/tests/testthat/test-get_dataset_additional_info.R b/tests/testthat/test-get_dataset_additional_info.R index 2ef9751..5041ac5 100644 --- a/tests/testthat/test-get_dataset_additional_info.R +++ b/tests/testthat/test-get_dataset_additional_info.R @@ -1,3 +1,5 @@ +skip_if_offline(host = "www.opendata.nhs.scot") + test_that("returns data in the expected format", { expect_s3_class(get_dataset_additional_info("weekly-accident-and-emergency-activity-and-waiting-times"), "tbl_df") }) diff --git a/tests/testthat/test-list_datasets.R b/tests/testthat/test-list_datasets.R index 341326b..bdef181 100644 --- a/tests/testthat/test-list_datasets.R +++ b/tests/testthat/test-list_datasets.R @@ -1,3 +1,5 @@ +skip_if_offline(host = "www.opendata.nhs.scot") + test_that("returns more than 0 datasets", { # select the first row of the tibble and get the # number of rows. If no datasets were returned diff --git a/tests/testthat/test-list_resources.R b/tests/testthat/test-list_resources.R index 5434300..205c303 100644 --- a/tests/testthat/test-list_resources.R +++ b/tests/testthat/test-list_resources.R @@ -1,3 +1,5 @@ +skip_if_offline(host = "www.opendata.nhs.scot") + test_that("returns data in the expected format", { expect_s3_class(list_datasets(), "tbl_df") }) From 3ec642579b01a04f0dad4a985f7e543de5bd4bbc Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 6 Aug 2024 15:44:58 +0100 Subject: [PATCH 87/93] Expand tests for list_resources --- tests/testthat/test-list_resources.R | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-list_resources.R b/tests/testthat/test-list_resources.R index 205c303..b22110e 100644 --- a/tests/testthat/test-list_resources.R +++ b/tests/testthat/test-list_resources.R @@ -1,5 +1,19 @@ skip_if_offline(host = "www.opendata.nhs.scot") test_that("returns data in the expected format", { - expect_s3_class(list_datasets(), "tbl_df") + data <- list_resources("diagnostic-waiting-times") + + expect_s3_class(data,"tbl_df") + expect_named(data, c("res_id", "name", "created", "last_modified")) + expect_equal(dplyr::n_distinct(data[["res_id"]]), nrow(data)) + expect_equal(dplyr::n_distinct(data[["name"]]), nrow(data)) +}) + +test_that("returns errors properly", { + expect_error(list_resources(), "argument \"dataset_name\" is missing, with no default$") + expect_error(list_resources("bad_name"), "dataset_name must be in dash-case") + expect_error(list_resources("incorrect-name"), "Can't find the dataset name") + expect_error(list_resources("diagnostic-waiting-time"), "diagnostic-waiting-times") }) + + From 295b558af766ac358428ed178ac014c37ce22d9a Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 6 Aug 2024 15:46:34 +0100 Subject: [PATCH 88/93] Simplify test --- tests/testthat/test-list_datasets.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/testthat/test-list_datasets.R b/tests/testthat/test-list_datasets.R index bdef181..84c7c5d 100644 --- a/tests/testthat/test-list_datasets.R +++ b/tests/testthat/test-list_datasets.R @@ -1,10 +1,7 @@ skip_if_offline(host = "www.opendata.nhs.scot") test_that("returns more than 0 datasets", { - # select the first row of the tibble and get the - # number of rows. If no datasets were returned - # this will be 0 - expect_equal(nrow(dplyr::slice(list_datasets(), 1)), 1) + expect_gte(nrow(list_datasets()), 1) }) test_that("returns data in the expected format", { From a5e35f203bf1531b98e7d6dfff832fd36ab1e7c0 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Tue, 6 Aug 2024 15:52:39 +0100 Subject: [PATCH 89/93] Expand list_dataset tests --- tests/testthat/test-list_datasets.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-list_datasets.R b/tests/testthat/test-list_datasets.R index 84c7c5d..1560fbd 100644 --- a/tests/testthat/test-list_datasets.R +++ b/tests/testthat/test-list_datasets.R @@ -5,5 +5,9 @@ test_that("returns more than 0 datasets", { }) test_that("returns data in the expected format", { - expect_s3_class(list_datasets(), "tbl_df") + data <- list_datasets() + + expect_s3_class(data,"tbl_df") + expect_named(data, "name") + expect_equal(dplyr::n_distinct(data[["name"]]), nrow(data)) }) From 7dd04b1961565f3391bf83c100b679c555112ab3 Mon Sep 17 00:00:00 2001 From: Moohan Date: Tue, 6 Aug 2024 14:55:17 +0000 Subject: [PATCH 90/93] Style code (GHA) --- tests/testthat/test-list_datasets.R | 2 +- tests/testthat/test-list_resources.R | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-list_datasets.R b/tests/testthat/test-list_datasets.R index 1560fbd..48f400f 100644 --- a/tests/testthat/test-list_datasets.R +++ b/tests/testthat/test-list_datasets.R @@ -7,7 +7,7 @@ test_that("returns more than 0 datasets", { test_that("returns data in the expected format", { data <- list_datasets() - expect_s3_class(data,"tbl_df") + expect_s3_class(data, "tbl_df") expect_named(data, "name") expect_equal(dplyr::n_distinct(data[["name"]]), nrow(data)) }) diff --git a/tests/testthat/test-list_resources.R b/tests/testthat/test-list_resources.R index b22110e..21caa6e 100644 --- a/tests/testthat/test-list_resources.R +++ b/tests/testthat/test-list_resources.R @@ -3,7 +3,7 @@ skip_if_offline(host = "www.opendata.nhs.scot") test_that("returns data in the expected format", { data <- list_resources("diagnostic-waiting-times") - expect_s3_class(data,"tbl_df") + expect_s3_class(data, "tbl_df") expect_named(data, c("res_id", "name", "created", "last_modified")) expect_equal(dplyr::n_distinct(data[["res_id"]]), nrow(data)) expect_equal(dplyr::n_distinct(data[["name"]]), nrow(data)) @@ -15,5 +15,3 @@ test_that("returns errors properly", { expect_error(list_resources("incorrect-name"), "Can't find the dataset name") expect_error(list_resources("diagnostic-waiting-time"), "diagnostic-waiting-times") }) - - From 84ec2956407119c253f490ddbce777d61b936fb0 Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Wed, 7 Aug 2024 12:26:25 +0100 Subject: [PATCH 91/93] shorten variable names R/get_dataset_additional_info.R Co-authored-by: James McMahon --- R/get_dataset_additional_info.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/get_dataset_additional_info.R b/R/get_dataset_additional_info.R index b0dc359..fe539b2 100644 --- a/R/get_dataset_additional_info.R +++ b/R/get_dataset_additional_info.R @@ -35,8 +35,8 @@ get_dataset_additional_info <- function(dataset_name) { # create tibble to return return_value <- tibble::tibble( "name" = dataset_name, - "amount_of_resources" = amount_of_resources, - "most_recent_resource_update" = most_recent_resource_date + "n_resources" = amount_of_resources, + "last_updated" = most_recent_resource_date ) return(return_value) From fb05ec315f5f92b7e4d3d944d9e2b3c898cf903e Mon Sep 17 00:00:00 2001 From: ross hull Date: Wed, 7 Aug 2024 15:27:32 +0100 Subject: [PATCH 92/93] update tests test-get_dataset_additional_info.R --- tests/testthat/test-get_dataset_additional_info.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-get_dataset_additional_info.R b/tests/testthat/test-get_dataset_additional_info.R index 5041ac5..7cb2623 100644 --- a/tests/testthat/test-get_dataset_additional_info.R +++ b/tests/testthat/test-get_dataset_additional_info.R @@ -1,9 +1,12 @@ skip_if_offline(host = "www.opendata.nhs.scot") test_that("returns data in the expected format", { - expect_s3_class(get_dataset_additional_info("weekly-accident-and-emergency-activity-and-waiting-times"), "tbl_df") -}) + dataset <- get_dataset_additional_info("weekly-accident-and-emergency-activity-and-waiting-times") -test_that("returned tibble has one row", { - expect_equal(nrow(get_dataset_additional_info("weekly-accident-and-emergency-activity-and-waiting-times")), 1) + expect_s3_class(dataset, "tbl_df") + expect_equal(nrow(dataset), 1) + expect_named(dataset, c("name", "n_resources", "last_updated")) }) + + + From 5ef7b3b299874e63f4b6f35afc6fd2e3870133fc Mon Sep 17 00:00:00 2001 From: ross-hull Date: Wed, 7 Aug 2024 14:45:48 +0000 Subject: [PATCH 93/93] Style code (GHA) --- tests/testthat/test-get_dataset_additional_info.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/testthat/test-get_dataset_additional_info.R b/tests/testthat/test-get_dataset_additional_info.R index 7cb2623..aab8216 100644 --- a/tests/testthat/test-get_dataset_additional_info.R +++ b/tests/testthat/test-get_dataset_additional_info.R @@ -7,6 +7,3 @@ test_that("returns data in the expected format", { expect_equal(nrow(dataset), 1) expect_named(dataset, c("name", "n_resources", "last_updated")) }) - - -