Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create tests for social care sandpit extracts #943

Merged
merged 29 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
d7e7a24
Update `write_tests_xlsx`
Jennit07 Mar 29, 2024
72d3a21
Update documentation
Jennit07 Mar 29, 2024
3c554f0
Add in sandpit tests where the extract is saved
Jennit07 Mar 29, 2024
4747958
Setup tests for sandpit
Jennit07 Apr 3, 2024
690b39f
Update documentation
Jennit07 Apr 3, 2024
1d94a23
Amend case_when statement
Jennit07 Apr 5, 2024
73f4c4b
rename function to include 'sc'
Jennit07 Apr 5, 2024
602ef3f
Update documentation
Jennit07 Apr 5, 2024
39bf602
Use `is.null` instead of `missing`
Jennit07 Apr 8, 2024
b238d4e
Update documentation
Jennit07 Apr 8, 2024
a85521c
Add `year` as a parameter
Jennit07 Apr 8, 2024
939f28c
Update documentation
Jennit07 Apr 8, 2024
910a57b
Setup for writing sandpit tests to disk
Jennit07 Apr 8, 2024
a15cf1b
Update parameters for sandpit tests
Jennit07 Apr 8, 2024
a1be5b1
Update documentation
Jennit07 Apr 8, 2024
0990dd2
Use `process_tests_sc_sandpit`
Jennit07 Apr 9, 2024
bc7ac61
Apply styling
Jennit07 Apr 9, 2024
b0adae3
Style code
Jennit07 Apr 9, 2024
25871fd
Merge branch 'June-24-update' into tests-sandpit
Jennit07 Apr 9, 2024
bdeda4b
Merge branch 'June-24-update' into tests-sandpit
lizihao-anu Apr 15, 2024
29aacb0
update documentation
Jennit07 Apr 15, 2024
ed01b6d
Rename variable sc_id
Jennit07 Apr 15, 2024
510e282
Rename variable
Jennit07 Apr 15, 2024
448df57
Rename variable
Jennit07 Apr 15, 2024
9285f04
Update documentation
Jennit07 Apr 15, 2024
b8d6e84
Merge branch 'June-24-update' into tests-sandpit
lizihao-anu Apr 15, 2024
3648a71
[check-spelling] Update metadata
lizihao-anu Apr 15, 2024
fc9e938
update spelling
lizihao-anu Apr 15, 2024
fbb2f43
update spelling expect variant
lizihao-anu Apr 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions .github/actions/spelling/expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ aut
bedday
birthtime
bodyloc
BOXI
boxi
callr
canx
carehome
Expand Down Expand Up @@ -169,8 +169,6 @@ postcodes
ppas
prac
praccode
prac
praccode
ptypes
purrr
quickstart
Expand Down Expand Up @@ -207,7 +205,7 @@ setkeyv
setnafill
setnames
setorder
siar
Siar
sigfac
simd
slf
Expand Down Expand Up @@ -246,6 +244,6 @@ xlsx
yearstay
yml
yyyyqx
zihao
Zihao
zsav
zstd
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,10 @@ export(process_tests_sc_all_hc_episodes)
export(process_tests_sc_all_sds_episodes)
export(process_tests_sc_client_lookup)
export(process_tests_sc_demographics)
export(process_tests_sc_sandpit)
export(process_tests_sds)
export(produce_episode_file_tests)
export(produce_sc_sandpit_tests)
export(produce_source_extract_tests)
export(produce_test_comparison)
export(read_extract_acute)
Expand Down
144 changes: 144 additions & 0 deletions R/process_tests_sc_sandpit.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#' Process tests for the social care sandpit extracts
#'
#' @param type Name of sandpit extract.
#'
#' @return a [tibble][tibble::tibble-package] containing a test comparison.
#' @export
#'
process_tests_sc_sandpit <- function(type = c("at", "hc", "ch", "sds", "demographics", "client"), year = NULL) {
comparison <- produce_test_comparison(
old_data = produce_sc_sandpit_tests(
read_file(get_sandpit_extract_path(type = {{ type }}, year = year, update = previous_update())),
type = {{ type }}
),
new_data = produce_sc_sandpit_tests(
read_file(get_sandpit_extract_path(type = {{ type }}, year = year, update = latest_update())),
type = {{ type }}
)
)

comparison %>%
write_tests_xlsx(sheet_name = {{ type }}, year = year, workbook_name = "sandpit")

return(comparison)
}


#' Produce tests for social care sandpit extracts.
#'
#' @param data new or old data for testing summary flags
#' (data is from [get_sandpit_extract_path()])
#' @param type Name of sandpit extract.
#'
#' @return a dataframe with a count of each flag
#' from [calculate_measures()]
#' @export
#'
produce_sc_sandpit_tests <- function(data, type = c("demographics", "client", "at", "ch", "hc", "sds")) {
if (type == "demographics") {
missing_tests <- data %>%
dplyr::mutate(
n_missing_chi = is_missing(.data$chi_upi),
n_missing_sc_id = is_missing(.data$social_care_id),
n_missing_dob = is.na(.data$chi_date_of_birth),
n_missing_postcode = is_missing(.data$chi_postcode),
n_missing_gender = is_missing(.data$chi_gender_code)
) %>%
dplyr::select(n_missing_chi:n_missing_gender) %>%
calculate_measures(measure = "sum")

latest_flag_tests <- data %>%
dplyr::filter(!(is.na(.data$chi_upi))) %>%
dplyr::group_by(.data$chi_upi, .data$sending_location) %>%
dplyr::summarise(latest_count = sum(.data$latest_record_flag)) %>%
dplyr::ungroup() %>%
dplyr::mutate(
chi_latest_flag_0 = dplyr::if_else(.data$latest_count == 0, 1, 0),
chi_latest_flag_1 = dplyr::if_else(.data$latest_count == 1, 1, 0),
chi_latest_flag_2 = dplyr::if_else(.data$latest_count == 2, 1, 0),
chi_latest_flag_3 = dplyr::if_else(.data$latest_count == 3, 1, 0),
chi_latest_flag_4 = dplyr::if_else(.data$latest_count == 4, 1, 0),
chi_latest_flag_5 = dplyr::if_else(.data$latest_count == 5, 1, 0),
chi_latest_flag_6 = dplyr::if_else(.data$latest_count == 6, 1, 0),
chi_latest_flag_7 = dplyr::if_else(.data$latest_count == 7, 1, 0),
chi_latest_flag_8 = dplyr::if_else(.data$latest_count == 8, 1, 0),
chi_latest_flag_9 = dplyr::if_else(.data$latest_count == 9, 1, 0),
chi_latest_flag_10 = dplyr::if_else(.data$latest_count == 10, 1, 0)
) %>%
dplyr::select(.data$chi_latest_flag_0:.data$chi_latest_flag_10) %>%
calculate_measures(measure = "sum")

# add a flag for sc ids where there is multiple chi associated
sc_id_multi_chi <- data %>%
dplyr::distinct() %>%
dplyr::filter(!(is.na(.data$chi_upi))) %>%
dplyr::group_by(.data$social_care_id, .data$sending_location) %>%
dplyr::distinct(.data$chi_upi, .keep_all = TRUE) %>%
dplyr::mutate(distinct_chi_count = dplyr::n_distinct(.data$chi_upi)) %>%
dplyr::filter(distinct_chi_count > 1) %>%
dplyr::distinct(.data$social_care_id, .data$sending_location, .keep_all = TRUE) %>%
dplyr::mutate(sc_id_multi_chi = 1) %>%
create_sending_location_test_flags(.data$sending_location) %>%
dplyr::ungroup() %>%
dplyr::rename(
sc_id_multi_chi_Aberdeen_City = Aberdeen_City,
sc_id_multi_chi_Aberdeenshire = Aberdeenshire,
sc_id_multi_chi_Angus = Angus,
sc_id_multi_chi_Argyll_and_Bute = Argyll_and_Bute,
sc_id_multi_chi_City_of_Edinburgh = City_of_Edinburgh,
sc_id_multi_chi_Clackmannanshire = Clackmannanshire,
sc_id_multi_chi_Dumfries_and_Galloway = Dumfries_and_Galloway,
sc_id_multi_chi_Dundee_City = Dundee_City,
sc_id_multi_chi_East_Ayrshire = East_Ayrshire,
sc_id_multi_chi_East_Dunbartonshire = East_Dunbartonshire,
sc_id_multi_chi_East_Lothian = East_Lothian,
sc_id_multi_chi_East_Renfrewshire = East_Renfrewshire,
sc_id_multi_chi_Falkirk = Falkirk,
sc_id_multi_chi_Fife = Fife,
sc_id_multi_chi_Glasgow_City = Glasgow_City,
sc_id_multi_chi_Highland = Highland,
sc_id_multi_chi_Inverclyde = Inverclyde,
sc_id_multi_chi_Midlothian = Midlothian,
sc_id_multi_chi_Moray = Moray,
sc_id_multi_chi_Na_h_Eileanan_Siar = Na_h_Eileanan_Siar,
sc_id_multi_chi_North_Ayrshire = North_Ayrshire,
sc_id_multi_chi_North_Lanarkshire = North_Lanarkshire,
sc_id_multi_chi_Orkney_Islands = Orkney_Islands,
sc_id_multi_chi_Perth_and_Kinross = Perth_and_Kinross,
sc_id_multi_chi_Renfrewshire = Renfrewshire,
sc_id_multi_chi_Scottish_Borders = Scottish_Borders,
sc_id_multi_chi_Shetland_Islands = Shetland_Islands,
sc_id_multi_chi_South_Ayrshire = South_Ayrshire,
sc_id_multi_chi_South_Lanarkshire = South_Lanarkshire,
sc_id_multi_chi_Stirling = Stirling,
sc_id_multi_chi_West_Dunbartonshire = West_Dunbartonshire,
sc_id_multi_chi_West_Lothian = West_Lothian
) %>%
dplyr::select(.data$sc_id_multi_chi, .data$sc_id_multi_chi_Aberdeen_City:.data$sc_id_multi_chi_West_Lothian) %>%
calculate_measures(measure = "sum")

output <- list(
missing_tests,
latest_flag_tests,
sc_id_multi_chi
) %>%
purrr::reduce(dplyr::full_join, by = c("measure", "value"))

return(output)
} else if (type == "client" | type == "at" | type == "ch" |
type == "hc" | type == "sds") {
output <- data %>%
# create test flags
dplyr::mutate(
unique_sc_id = dplyr::lag(.data$social_care_id) != .data$social_care_id,
n_missing_sc_id = is_missing(.data$social_care_id)
) %>%
create_sending_location_test_flags(.data$sending_location) %>%
# remove variables that won't be summed
dplyr::select(c("unique_sc_id":"West_Lothian")) %>%
# use function to sum new test flags
calculate_measures(measure = "sum")

return(output)
}
}
3 changes: 3 additions & 0 deletions R/read_lookup_sc_client.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ read_lookup_sc_client <- function(fyyear,
if (!fs::file_exists(get_sandpit_extract_path(type = "client", year = fyyear))) {
client_data %>%
write_file(get_sandpit_extract_path(type = "client", year = fyyear))

client_data %>%
process_tests_sc_sandpit(type = "client", year = fyyear)
} else {
client_data <- client_data
}
Expand Down
3 changes: 3 additions & 0 deletions R/read_lookup_sc_demographics.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection
if (!fs::file_exists(get_sandpit_extract_path(type = "demographics"))) {
sc_demog %>%
write_file(get_sandpit_extract_path(type = "demographics"))

sc_demog %>%
process_tests_sc_sandpit(type = "demographics")
} else {
sc_demog <- sc_demog
}
Expand Down
3 changes: 3 additions & 0 deletions R/read_sc_all_alarms_telecare.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection
if (!fs::file_exists(get_sandpit_extract_path(type = "at"))) {
at_full_data %>%
write_file(get_sandpit_extract_path(type = "at"))

at_full_data %>%
process_tests_sandpit(type = "at")
} else {
at_full_data <- at_full_data
}
Expand Down
3 changes: 3 additions & 0 deletions R/read_sc_all_care_home.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn =
if (!fs::file_exists(get_sandpit_extract_path(type = "ch"))) {
ch_data %>%
write_file(get_sandpit_extract_path(type = "ch"))

ch_data %>%
process_tests_sandpit(type = "ch")
} else {
ch_data <- ch_data
}
Expand Down
3 changes: 3 additions & 0 deletions R/read_sc_all_home_care.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ read_sc_all_home_care <- function(sc_dvprod_connection = phs_db_connection(dsn =
if (!fs::file_exists(get_sandpit_extract_path(type = "hc"))) {
home_care_data %>%
write_file(get_sandpit_extract_path(type = "hc"))

home_care_date %>%
process_tests_sandpit(type = "hc")
} else {
home_care_data <- home_care_data
}
Expand Down
3 changes: 3 additions & 0 deletions R/read_sc_all_sds.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR
if (!fs::file_exists(get_sandpit_extract_path(type = "sds"))) {
sds_full_data %>%
write_file(get_sandpit_extract_path(type = "sds"))

sds_full_data %>%
process_tests_sandpit(type = "sds")
} else {
sds_full_data <- sds_full_data
}
Expand Down
25 changes: 16 additions & 9 deletions R/write_tests_xlsx.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,22 @@
write_tests_xlsx <- function(comparison_data,
sheet_name,
year = NULL,
workbook_name = c("ep_file", "indiv_file", "lookup", "extract")) {
workbook_name = c("ep_file", "indiv_file", "lookup", "extract", "sandpit")) {
# Set up the workbook ----

if (workbook_name == "lookup" | missing(year) & workbook_name == "lookup") {
tests_workbook_name <- stringr::str_glue(latest_update(), "_lookups_tests")
} else {
if (is.null(year)) {
tests_workbook_name <- dplyr::case_when(
workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_ep_file_tests"),
workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_indiv_file_tests"),
workbook_name == "lookup" ~ stringr::str_glue(latest_update(), "_lookups_tests"),
workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests")
)
} else if (workbook_name == "sandpit" & !is.null(year)) {
tests_workbook_name <- dplyr::case_when(
workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests")
)
} else {
tests_workbook_name <- dplyr::case_when(
workbook_name == "extract" ~ stringr::str_glue(latest_update(), "_{year}_extract_tests")
)
}
Expand Down Expand Up @@ -92,11 +99,11 @@ write_tests_xlsx <- function(comparison_data,

date_today <- stringr::str_to_lower(date_today)

sheet_name_dated <- ifelse(
is.null(year),
stringr::str_glue("{sheet_name}_{date_today}"),
stringr::str_glue("{year}_{sheet_name}_{date_today}")
)
if (is.null(year)) {
sheet_name_dated <- stringr::str_glue("{sheet_name}_{date_today}")
} else {
sheet_name_dated <- stringr::str_glue("{year}_{sheet_name}_{date_today}")
}

# If there has already been a sheet created today, append the time
if (sheet_name_dated %in% names(wb)) {
Expand Down
20 changes: 20 additions & 0 deletions man/process_tests_sc_sandpit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions man/produce_sc_sandpit_tests.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/write_tests_xlsx.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading