Skip to content

Commit

Permalink
Create tests for social care sandpit extracts (#943)
Browse files Browse the repository at this point in the history
* Update `write_tests_xlsx`

* Update documentation

* Add in sandpit tests where the extract is saved

* Setup tests for sandpit
Further checks needed for writing to disk

* Update documentation

* Amend case_when statement

* rename function to include 'sc'

* Update documentation

* Use `is.null` instead of `missing`

* Update documentation

* Add `year` as a parameter

* Update documentation

* Setup for writing sandpit tests to disk

* Update parameters for sandpit tests

* Update documentation

* Use `process_tests_sc_sandpit`

* Apply styling

* Style code

* update documentation

Co-authored-by: Zihao Li <[email protected]>

* Rename variable sc_id

Co-authored-by: Zihao Li <[email protected]>

* Rename variable

Co-authored-by: Zihao Li <[email protected]>

* Rename variable

Co-authored-by: Zihao Li <[email protected]>

* Update documentation

* [check-spelling] Update metadata

Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/8689503990/attempts/1
Accepted in #943 (comment)

Signed-off-by: check-spelling-bot <[email protected]>
on-behalf-of: @check-spelling <[email protected]>

* update spelling

* update spelling expect variant

---------

Signed-off-by: check-spelling-bot <[email protected]>
Co-authored-by: Jennit07 <[email protected]>
Co-authored-by: Zihao Li <[email protected]>
Co-authored-by: Zihao Li <[email protected]>
  • Loading branch information
4 people authored Apr 17, 2024
1 parent a0ac030 commit 2cff80d
Show file tree
Hide file tree
Showing 13 changed files with 228 additions and 15 deletions.
8 changes: 3 additions & 5 deletions .github/actions/spelling/expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ aut
bedday
birthtime
bodyloc
BOXI
boxi
callr
canx
carehome
Expand Down Expand Up @@ -169,8 +169,6 @@ postcodes
ppas
prac
praccode
prac
praccode
ptypes
purrr
quickstart
Expand Down Expand Up @@ -207,7 +205,7 @@ setkeyv
setnafill
setnames
setorder
siar
Siar
sigfac
simd
slf
Expand Down Expand Up @@ -246,6 +244,6 @@ xlsx
yearstay
yml
yyyyqx
zihao
Zihao
zsav
zstd
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,10 @@ export(process_tests_sc_all_hc_episodes)
export(process_tests_sc_all_sds_episodes)
export(process_tests_sc_client_lookup)
export(process_tests_sc_demographics)
export(process_tests_sc_sandpit)
export(process_tests_sds)
export(produce_episode_file_tests)
export(produce_sc_sandpit_tests)
export(produce_source_extract_tests)
export(produce_test_comparison)
export(read_extract_acute)
Expand Down
144 changes: 144 additions & 0 deletions R/process_tests_sc_sandpit.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#' Process tests for the social care sandpit extracts
#'
#' @param type Name of sandpit extract.
#'
#' @return a [tibble][tibble::tibble-package] containing a test comparison.
#' @export
#'
process_tests_sc_sandpit <- function(type = c("at", "hc", "ch", "sds", "demographics", "client"), year = NULL) {
comparison <- produce_test_comparison(
old_data = produce_sc_sandpit_tests(
read_file(get_sandpit_extract_path(type = {{ type }}, year = year, update = previous_update())),
type = {{ type }}
),
new_data = produce_sc_sandpit_tests(
read_file(get_sandpit_extract_path(type = {{ type }}, year = year, update = latest_update())),
type = {{ type }}
)
)

comparison %>%
write_tests_xlsx(sheet_name = {{ type }}, year = year, workbook_name = "sandpit")

return(comparison)
}


#' Produce tests for social care sandpit extracts.
#'
#' @param data new or old data for testing summary flags
#' (data is from [get_sandpit_extract_path()])
#' @param type Name of sandpit extract.
#'
#' @return a dataframe with a count of each flag
#' from [calculate_measures()]
#' @export
#'
produce_sc_sandpit_tests <- function(data, type = c("demographics", "client", "at", "ch", "hc", "sds")) {
if (type == "demographics") {
missing_tests <- data %>%
dplyr::mutate(
n_missing_chi = is_missing(.data$chi_upi),
n_missing_sc_id = is_missing(.data$social_care_id),
n_missing_dob = is.na(.data$chi_date_of_birth),
n_missing_postcode = is_missing(.data$chi_postcode),
n_missing_gender = is_missing(.data$chi_gender_code)
) %>%
dplyr::select(n_missing_chi:n_missing_gender) %>%
calculate_measures(measure = "sum")

latest_flag_tests <- data %>%
dplyr::filter(!(is.na(.data$chi_upi))) %>%
dplyr::group_by(.data$chi_upi, .data$sending_location) %>%
dplyr::summarise(latest_count = sum(.data$latest_record_flag)) %>%
dplyr::ungroup() %>%
dplyr::mutate(
chi_latest_flag_0 = dplyr::if_else(.data$latest_count == 0, 1, 0),
chi_latest_flag_1 = dplyr::if_else(.data$latest_count == 1, 1, 0),
chi_latest_flag_2 = dplyr::if_else(.data$latest_count == 2, 1, 0),
chi_latest_flag_3 = dplyr::if_else(.data$latest_count == 3, 1, 0),
chi_latest_flag_4 = dplyr::if_else(.data$latest_count == 4, 1, 0),
chi_latest_flag_5 = dplyr::if_else(.data$latest_count == 5, 1, 0),
chi_latest_flag_6 = dplyr::if_else(.data$latest_count == 6, 1, 0),
chi_latest_flag_7 = dplyr::if_else(.data$latest_count == 7, 1, 0),
chi_latest_flag_8 = dplyr::if_else(.data$latest_count == 8, 1, 0),
chi_latest_flag_9 = dplyr::if_else(.data$latest_count == 9, 1, 0),
chi_latest_flag_10 = dplyr::if_else(.data$latest_count == 10, 1, 0)
) %>%
dplyr::select(.data$chi_latest_flag_0:.data$chi_latest_flag_10) %>%
calculate_measures(measure = "sum")

# add a flag for sc ids where there is multiple chi associated
sc_id_multi_chi <- data %>%
dplyr::distinct() %>%
dplyr::filter(!(is.na(.data$chi_upi))) %>%
dplyr::group_by(.data$social_care_id, .data$sending_location) %>%
dplyr::distinct(.data$chi_upi, .keep_all = TRUE) %>%
dplyr::mutate(distinct_chi_count = dplyr::n_distinct(.data$chi_upi)) %>%
dplyr::filter(distinct_chi_count > 1) %>%
dplyr::distinct(.data$social_care_id, .data$sending_location, .keep_all = TRUE) %>%
dplyr::mutate(sc_id_multi_chi = 1) %>%
create_sending_location_test_flags(.data$sending_location) %>%
dplyr::ungroup() %>%
dplyr::rename(
sc_id_multi_chi_Aberdeen_City = Aberdeen_City,
sc_id_multi_chi_Aberdeenshire = Aberdeenshire,
sc_id_multi_chi_Angus = Angus,
sc_id_multi_chi_Argyll_and_Bute = Argyll_and_Bute,
sc_id_multi_chi_City_of_Edinburgh = City_of_Edinburgh,
sc_id_multi_chi_Clackmannanshire = Clackmannanshire,
sc_id_multi_chi_Dumfries_and_Galloway = Dumfries_and_Galloway,
sc_id_multi_chi_Dundee_City = Dundee_City,
sc_id_multi_chi_East_Ayrshire = East_Ayrshire,
sc_id_multi_chi_East_Dunbartonshire = East_Dunbartonshire,
sc_id_multi_chi_East_Lothian = East_Lothian,
sc_id_multi_chi_East_Renfrewshire = East_Renfrewshire,
sc_id_multi_chi_Falkirk = Falkirk,
sc_id_multi_chi_Fife = Fife,
sc_id_multi_chi_Glasgow_City = Glasgow_City,
sc_id_multi_chi_Highland = Highland,
sc_id_multi_chi_Inverclyde = Inverclyde,
sc_id_multi_chi_Midlothian = Midlothian,
sc_id_multi_chi_Moray = Moray,
sc_id_multi_chi_Na_h_Eileanan_Siar = Na_h_Eileanan_Siar,
sc_id_multi_chi_North_Ayrshire = North_Ayrshire,
sc_id_multi_chi_North_Lanarkshire = North_Lanarkshire,
sc_id_multi_chi_Orkney_Islands = Orkney_Islands,
sc_id_multi_chi_Perth_and_Kinross = Perth_and_Kinross,
sc_id_multi_chi_Renfrewshire = Renfrewshire,
sc_id_multi_chi_Scottish_Borders = Scottish_Borders,
sc_id_multi_chi_Shetland_Islands = Shetland_Islands,
sc_id_multi_chi_South_Ayrshire = South_Ayrshire,
sc_id_multi_chi_South_Lanarkshire = South_Lanarkshire,
sc_id_multi_chi_Stirling = Stirling,
sc_id_multi_chi_West_Dunbartonshire = West_Dunbartonshire,
sc_id_multi_chi_West_Lothian = West_Lothian
) %>%
dplyr::select(.data$sc_id_multi_chi, .data$sc_id_multi_chi_Aberdeen_City:.data$sc_id_multi_chi_West_Lothian) %>%
calculate_measures(measure = "sum")

output <- list(
missing_tests,
latest_flag_tests,
sc_id_multi_chi
) %>%
purrr::reduce(dplyr::full_join, by = c("measure", "value"))

return(output)
} else if (type == "client" | type == "at" | type == "ch" |
type == "hc" | type == "sds") {
output <- data %>%
# create test flags
dplyr::mutate(
unique_sc_id = dplyr::lag(.data$social_care_id) != .data$social_care_id,
n_missing_sc_id = is_missing(.data$social_care_id)
) %>%
create_sending_location_test_flags(.data$sending_location) %>%
# remove variables that won't be summed
dplyr::select(c("unique_sc_id":"West_Lothian")) %>%
# use function to sum new test flags
calculate_measures(measure = "sum")

return(output)
}
}
3 changes: 3 additions & 0 deletions R/read_lookup_sc_client.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ read_lookup_sc_client <- function(fyyear,
if (!fs::file_exists(get_sandpit_extract_path(type = "client", year = fyyear))) {
client_data %>%
write_file(get_sandpit_extract_path(type = "client", year = fyyear))

client_data %>%
process_tests_sc_sandpit(type = "client", year = fyyear)
} else {
client_data <- client_data
}
Expand Down
3 changes: 3 additions & 0 deletions R/read_lookup_sc_demographics.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection
if (!fs::file_exists(get_sandpit_extract_path(type = "demographics"))) {
sc_demog %>%
write_file(get_sandpit_extract_path(type = "demographics"))

sc_demog %>%
process_tests_sc_sandpit(type = "demographics")
} else {
sc_demog <- sc_demog
}
Expand Down
3 changes: 3 additions & 0 deletions R/read_sc_all_alarms_telecare.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection
if (!fs::file_exists(get_sandpit_extract_path(type = "at"))) {
at_full_data %>%
write_file(get_sandpit_extract_path(type = "at"))

at_full_data %>%
process_tests_sandpit(type = "at")
} else {
at_full_data <- at_full_data
}
Expand Down
3 changes: 3 additions & 0 deletions R/read_sc_all_care_home.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn =
if (!fs::file_exists(get_sandpit_extract_path(type = "ch"))) {
ch_data %>%
write_file(get_sandpit_extract_path(type = "ch"))

ch_data %>%
process_tests_sandpit(type = "ch")
} else {
ch_data <- ch_data
}
Expand Down
3 changes: 3 additions & 0 deletions R/read_sc_all_home_care.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ read_sc_all_home_care <- function(sc_dvprod_connection = phs_db_connection(dsn =
if (!fs::file_exists(get_sandpit_extract_path(type = "hc"))) {
home_care_data %>%
write_file(get_sandpit_extract_path(type = "hc"))

home_care_date %>%
process_tests_sandpit(type = "hc")
} else {
home_care_data <- home_care_data
}
Expand Down
3 changes: 3 additions & 0 deletions R/read_sc_all_sds.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR
if (!fs::file_exists(get_sandpit_extract_path(type = "sds"))) {
sds_full_data %>%
write_file(get_sandpit_extract_path(type = "sds"))

sds_full_data %>%
process_tests_sandpit(type = "sds")
} else {
sds_full_data <- sds_full_data
}
Expand Down
25 changes: 16 additions & 9 deletions R/write_tests_xlsx.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,22 @@
write_tests_xlsx <- function(comparison_data,
sheet_name,
year = NULL,
workbook_name = c("ep_file", "indiv_file", "lookup", "extract")) {
workbook_name = c("ep_file", "indiv_file", "lookup", "extract", "sandpit")) {
# Set up the workbook ----

if (workbook_name == "lookup" | missing(year) & workbook_name == "lookup") {
tests_workbook_name <- stringr::str_glue(latest_update(), "_lookups_tests")
} else {
if (is.null(year)) {
tests_workbook_name <- dplyr::case_when(
workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_ep_file_tests"),
workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_indiv_file_tests"),
workbook_name == "lookup" ~ stringr::str_glue(latest_update(), "_lookups_tests"),
workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests")
)
} else if (workbook_name == "sandpit" & !is.null(year)) {
tests_workbook_name <- dplyr::case_when(
workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests")
)
} else {
tests_workbook_name <- dplyr::case_when(
workbook_name == "extract" ~ stringr::str_glue(latest_update(), "_{year}_extract_tests")
)
}
Expand Down Expand Up @@ -92,11 +99,11 @@ write_tests_xlsx <- function(comparison_data,

date_today <- stringr::str_to_lower(date_today)

sheet_name_dated <- ifelse(
is.null(year),
stringr::str_glue("{sheet_name}_{date_today}"),
stringr::str_glue("{year}_{sheet_name}_{date_today}")
)
if (is.null(year)) {
sheet_name_dated <- stringr::str_glue("{sheet_name}_{date_today}")
} else {
sheet_name_dated <- stringr::str_glue("{year}_{sheet_name}_{date_today}")
}

# If there has already been a sheet created today, append the time
if (sheet_name_dated %in% names(wb)) {
Expand Down
20 changes: 20 additions & 0 deletions man/process_tests_sc_sandpit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions man/produce_sc_sandpit_tests.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/write_tests_xlsx.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 2cff80d

Please sign in to comment.