diff --git a/DESCRIPTION b/DESCRIPTION index 5123289dd..3d731a0af 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,4 +73,5 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 + diff --git a/NAMESPACE b/NAMESPACE index 670ed1932..91f6b66d9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -54,6 +54,7 @@ export(get_nsu_path) export(get_pop_path) export(get_practice_details_path) export(get_readcode_lookup_path) +export(get_sandpit_extract_path) export(get_sc_at_episodes_path) export(get_sc_ch_episodes_path) export(get_sc_client_lookup_path) diff --git a/R/00-update_refs.R b/R/00-update_refs.R index 9d119e74e..2052b938f 100644 --- a/R/00-update_refs.R +++ b/R/00-update_refs.R @@ -7,7 +7,7 @@ #' #' @family initialisation latest_update <- function() { - "Dec_2023" + "Mar_2024" } #' Previous update @@ -61,7 +61,7 @@ previous_update <- function(months_ago = 3L, override = NULL) { #' #' @family initialisation get_dd_period <- function() { - "Jul16_Sep23" + "Jul16_Dec23" } #' The latest financial year for Cost uplift setting diff --git a/R/add_hri_variables.R b/R/add_hri_variables.R index 710324646..519ce3694 100644 --- a/R/add_hri_variables.R +++ b/R/add_hri_variables.R @@ -82,7 +82,7 @@ add_hri_variables <- function( "mh_episodes", "gls_episodes", "op_newcons_attendances", - # op_newcons_dnas, + "op_newcons_dnas", "ae_attendances", "pis_paid_items", "ooh_cases" diff --git a/R/add_keep_population_flag.R b/R/add_keep_population_flag.R index 6050b278f..d418ac18c 100644 --- a/R/add_keep_population_flag.R +++ b/R/add_keep_population_flag.R @@ -15,7 +15,7 @@ add_keep_population_flag <- function(individual_file, year) { } else { ## Obtain the population estimates for Locality AgeGroup and Gender. pop_estimates <- - readr::read_rds(get_datazone_pop_path("DataZone2011_pop_est_2011_2021.rds")) %>% + readr::read_rds(get_pop_path(type = "datazone")) %>% dplyr::select(year, datazone2011, sex, age0:age90plus) # Step 1: Obtain the population estimates for Locality, AgeGroup, and Gender diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R index 8d9dff96d..d207b221a 100644 --- a/R/aggregate_by_chi.R +++ b/R/aggregate_by_chi.R @@ -7,7 +7,7 @@ #' @importFrom data.table .SD #' #' @inheritParams create_individual_file -aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) { +aggregate_by_chi <- function(episode_file, year, exclude_sc_var = FALSE) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") # Convert to data.table @@ -89,6 +89,7 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) { "episodes", "beddays", "cost", + "_dnas", "attendances", "attend", "contacts", @@ -109,8 +110,7 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) { vars_start_with( episode_file, "sds_option" - ), - "health_net_cost_inc_dnas" + ) ) cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")] if (exclude_sc_var) { @@ -187,6 +187,7 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) { individual_file_cols5[, chi := NULL], individual_file_cols6[, chi := NULL] ) + individual_file <- individual_file[, year := year] # convert back to tibble return(dplyr::as_tibble(individual_file)) diff --git a/R/calculate_stay.R b/R/calculate_stay.R index ae80b33c1..d1748a470 100644 --- a/R/calculate_stay.R +++ b/R/calculate_stay.R @@ -37,9 +37,6 @@ calculate_stay <- function(year, start_date, end_date, sc_qtr = NULL) { if (anyNA(sc_qtr)) { cli::cli_abort("Some of the submitted quarters are missing") } - # else { - # sc_qtr <- check_quarter_format(sc_qtr) - # } # Set Quarters qtr_end <- lubridate::add_with_rollback( @@ -51,6 +48,7 @@ calculate_stay <- function(year, start_date, end_date, sc_qtr = NULL) { lubridate::period(1L, "days") ) + # check logic here for care home methodology dummy_end_date <- dplyr::case_when( # If end_date is not missing use the end date !is.na(end_date) ~ end_date, diff --git a/R/check_year_valid.R b/R/check_year_valid.R index 51c66e1b0..2197d8c0e 100644 --- a/R/check_year_valid.R +++ b/R/check_year_valid.R @@ -46,7 +46,7 @@ check_year_valid <- function( return(FALSE) } else if (year >= "2425" && type %in% "sparra") { return(FALSE) - } else if (year >= "2324" && type %in% c("ch", "hc", "sds", "at")) { + } else if (year >= "2425" && type %in% c("ch", "hc", "sds", "at")) { return(FALSE) } diff --git a/R/create_demog_test_flags.R b/R/create_demog_test_flags.R index 3023292ce..b909679d9 100644 --- a/R/create_demog_test_flags.R +++ b/R/create_demog_test_flags.R @@ -3,19 +3,19 @@ #' @description Create the demographic flags for testing #' #' @param data a dataframe containing demographic variables e.g. chi +#' @param chi Specify chi or anon_chi. #' #' @return a dataframe with flag (1 or 0) for each demographic variable. #' Missing value flag from [is_missing()] #' #' @family flag functions -create_demog_test_flags <- function(data) { +create_demog_test_flags <- function(data, chi = c(chi, anon_chi)) { data %>% - dplyr::arrange(.data$chi) %>% + dplyr::arrange({{ chi }}) %>% # create test flags dplyr::mutate( - valid_chi = phsmethods::chi_check(.data$chi) == "Valid CHI", - unique_chi = dplyr::lag(.data$chi) != .data$chi, - n_missing_chi = is_missing(.data$chi), + unique_chi = dplyr::lag({{ chi }}) != {{ chi }}, + n_missing_chi = is_missing({{ chi }}), n_males = .data$gender == 1L, n_females = .data$gender == 2L, n_postcode = !is.na(.data$postcode) | !.data$postcode == "", diff --git a/R/create_demographic_lookup.R b/R/create_demographic_lookup.R index 2b252a151..d0e0c9988 100644 --- a/R/create_demographic_lookup.R +++ b/R/create_demographic_lookup.R @@ -344,18 +344,21 @@ assign_d_cohort_high_cc <- function(dementia, liver, cancer, spec) { - high_cc <- + high_cc <- dplyr::case_when( + spec == "G5" ~ TRUE, # FOR FUTURE: PhysicalandSensoryDisabilityClientGroup or LearningDisabilityClientGroup = "Y", # then high_cc_cohort = TRUE # FOR FUTURE: Care home removed, here's the code: .data$recid = "CH" & age < 65 - rowSums(dplyr::pick(c( + (rowSums(dplyr::pick(c( "dementia", "hefailure", "refailure", "liver", "cancer" - )), na.rm = TRUE) >= 1L | - spec == "G5" + )), na.rm = TRUE) >= 1L) ~ TRUE, + .default = FALSE + ) + return(high_cc) } diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 3de9223dd..a9503e83c 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -103,6 +103,8 @@ create_episode_file <- function( "mar_beddays" ) ) %>% + # match on sc client variables + join_sc_client(year, sc_client = sc_client, file_type = "episode") %>% # Check chi is valid using phsmethods function # If the CHI is invalid for whatever reason, set the CHI to NA dplyr::mutate( @@ -135,15 +137,15 @@ create_episode_file <- function( year, slf_deaths_lookup ) %>% - join_sc_client(year, sc_client = sc_client, file_type = "episode") %>% load_ep_file_vars(year) if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { episode_file <- episode_file %>% dplyr::mutate( ch_chi_cis = NA, - sc_id_cis = NA, + ch_sc_id_cis = NA, ch_name = NA, + ch_postcode = NA, ch_adm_reason = NA, ch_provider = NA, ch_nursing = NA, @@ -158,7 +160,9 @@ create_episode_file <- function( hc_cost_q4 = NA, hc_provider = NA, hc_reablement = NA, - sds_option_4 = NA, + person_id = NA, + sc_latest_submission = NA, + sc_send_lca = NA, sc_living_alone = NA, sc_support_from_unpaid_carer = NA, sc_social_worker = NA, diff --git a/R/create_individual_file.R b/R/create_individual_file.R index d9316b41b..e5b0fd2fd 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -73,7 +73,7 @@ create_individual_file <- function( ))) %>% remove_blank_chi() %>% add_cij_columns() %>% - add_all_columns() + add_all_columns(year = year) if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { individual_file <- individual_file %>% @@ -82,7 +82,7 @@ create_individual_file <- function( individual_file <- individual_file %>% aggregate_ch_episodes() %>% clean_up_ch(year) %>% - aggregate_by_chi(exclude_sc_var = FALSE) + aggregate_by_chi(year = year, exclude_sc_var = FALSE) } individual_file <- individual_file %>% @@ -202,7 +202,7 @@ add_cij_columns <- function(episode_file) { #' of prefixed column names created based on some condition. #' @family individual_file #' @inheritParams create_individual_file -add_all_columns <- function(episode_file) { +add_all_columns <- function(episode_file, year) { cli::cli_alert_info("Add all columns function started at {Sys.time()}") episode_file <- episode_file %>% @@ -483,8 +483,10 @@ add_ch_columns <- function(episode_file, prefix, condition) { ch_ep_end = dplyr::if_else( eval(condition), .data$record_keydate2, - lubridate::NA_Date_ ), - # If end date is missing use the first day of next FY quarter + lubridate::NA_Date_ + ), + # check logic here for care home methodology + # If end date is missing use the end of the FY quarter ch_ep_end = dplyr::if_else( eval(condition) & is.na(.data$ch_ep_end), start_next_fy_quarter(.data$sc_latest_submission), diff --git a/R/create_service_use_lookup.R b/R/create_service_use_lookup.R index 4acbfc507..242e0b351 100644 --- a/R/create_service_use_lookup.R +++ b/R/create_service_use_lookup.R @@ -908,7 +908,13 @@ assign_cohort_names <- function(data) { # Situation where no cost is greater than another, # so the maximum is the same as the mean .data$cost_max == rowSums( - dplyr::pick("psychiatry_cost":"residential_care_cost") + dplyr::pick(c( + "psychiatry_cost", "maternity_cost", "geriatric_cost", + "elective_inpatient_cost", "limited_daycases_cost", + "routine_daycase_cost", "single_emergency_cost", + "multiple_emergency_cost", "prescribing_cost", + "outpatient_cost", "ae2_cost", "residential_care_cost" + )) ) / 12.0 ~ "Unassigned", .data$cost_max == .data$psychiatry_cost ~ "Psychiatry", .data$cost_max == .data$maternity_cost ~ "Maternity", diff --git a/R/fix_sc_dates.R b/R/fix_sc_dates.R index c636980a6..117acbaab 100644 --- a/R/fix_sc_dates.R +++ b/R/fix_sc_dates.R @@ -9,7 +9,7 @@ #' @return A date vector with replaced end dates fix_sc_start_dates <- function(start_date, period_start) { # Fix sds_start_date is missing by setting start_date to be the start of - # financial year + # financial period start_date <- dplyr::if_else( is.na(start_date), period_start, @@ -30,12 +30,12 @@ fix_sc_start_dates <- function(start_date, period_start) { #' @param period Social care latest submission period. #' #' @return A date vector with replaced end dates -fix_sc_end_dates <- function(start_date, end_date, period) { +fix_sc_end_dates <- function(start_date, end_date, period_end_date) { # Fix sds_end_date is earlier than sds_start_date by setting end_date to be # the end of financial year end_date <- dplyr::if_else( start_date > end_date, - end_fy(year = stringr::str_sub(period, 1L, 4L), "alternate"), + period_end_date, end_date ) @@ -57,7 +57,7 @@ fix_sc_end_dates <- function(start_date, end_date, period) { #' @return A date vector with replaced end dates fix_sc_missing_end_dates <- function(end_date, period_end) { # Fix sds_end_date is earlier than sds_start_date by setting end_date to be - # the end of financial year + # the end of financial period end_date <- dplyr::if_else( is.na(end_date), period_end, diff --git a/R/get_fy_quarter_dates.R b/R/get_fy_quarter_dates.R index cd4c3492c..68ac3266e 100644 --- a/R/get_fy_quarter_dates.R +++ b/R/get_fy_quarter_dates.R @@ -15,8 +15,6 @@ start_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) - cal_quarter_date_unique <- lubridate::yq(quarter_unique) fy_quarter_date_unique <- lubridate::add_with_rollback( @@ -47,8 +45,6 @@ start_fy_quarter <- function(quarter) { end_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) - cal_quarter_date_unique <- lubridate::yq(quarter_unique) fy_quarter_date_unique <- lubridate::add_with_rollback( @@ -80,8 +76,6 @@ end_fy_quarter <- function(quarter) { start_next_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) - cal_quarter_date_unique <- lubridate::yq(quarter_unique) fy_quarter_date_unique <- lubridate::add_with_rollback( @@ -112,8 +106,6 @@ start_next_fy_quarter <- function(quarter) { end_next_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) - cal_quarter_date_unique <- lubridate::yq(quarter_unique) fy_quarter_date_unique <- lubridate::add_with_rollback( @@ -128,28 +120,3 @@ end_next_fy_quarter <- function(quarter) { return(end_next_fy_quarter) } - -#' Check quarter format -#' -#' @inheritParams start_fy_quarter -#' -#' @return `quarter` invisibly if no issues were found -#' -#' @family date functions -# check_quarter_format <- function(quarter) { -# stopifnot(typeof(quarter) == "character") -# -# if (any( -# stringr::str_detect(quarter, "^\\d{4}Q[1-4]$", negate = TRUE), -# na.rm = TRUE -# )) { -# cli::cli_abort( -# c("{.var quarter} must be in the format {.val YYYYQx} -# where {.val x} is the quarter number.", -# "v" = "For example {.val 2019Q1}." -# ) -# ) -# } -# -# return(invisible(quarter)) -# } diff --git a/R/get_lookup_paths.R b/R/get_lookup_paths.R index fe35a7d2f..7df5c52e2 100644 --- a/R/get_lookup_paths.R +++ b/R/get_lookup_paths.R @@ -126,7 +126,7 @@ get_pop_path <- function(file_name = NULL, "intzone" ~ stringr::str_glue("IntZone_pop_est_2011_\\d+?\\.{ext}") ) - datazone_pop_path <- get_file_path( + pop_path <- get_file_path( directory = pop_dir, file_name = file_name, ext = ext, diff --git a/R/get_sandpit_extract_path.R b/R/get_sandpit_extract_path.R new file mode 100644 index 000000000..9d8089122 --- /dev/null +++ b/R/get_sandpit_extract_path.R @@ -0,0 +1,39 @@ +#' Sandpit Extract File Path +#' +#' @description Get the file path for sandpit extracts +#' +#' @param update The update month to use, +#' defaults to [latest_update()] +#' +#' @param ... additional arguments passed to [get_file_path()] +#' +#' @return The path to the sandpit extracts as an [fs::path()] +#' @export +#' @family social care sandpit extract paths +#' @seealso [get_file_path()] for the generic function. +get_sandpit_extract_path <- function(type = c( + "at", "ch", "hc", + "sds", "client", "demographics" + ), + year = NULL, + update = latest_update(), ...) { + dir <- fs::path(get_slf_dir(), "Social_care", "Sandpit_Extracts") + + file_name <- dplyr::case_match( + type, + "at" ~ "sandpit_at_extract", + "ch" ~ "sandpit_ch_extract", + "hc" ~ "sandpit_hc_extract", + "sds" ~ "sandpit_sds_extract", + "client" ~ "sandpit_sc_client_extract", + "demographics" ~ "sandpit_sc_demographics_extract" + ) + + if (type == "client") { + sandpit_extract_path <- fs::path(dir, stringr::str_glue("{file_name}_{year}_{update}.parquet")) + } else { + sandpit_extract_path <- fs::path(dir, stringr::str_glue("{file_name}_{update}.parquet")) + } + + return(sandpit_extract_path) +} diff --git a/R/get_sc_episodes_path.R b/R/get_sc_episodes_path.R index 501def708..230b69727 100644 --- a/R/get_sc_episodes_path.R +++ b/R/get_sc_episodes_path.R @@ -13,7 +13,7 @@ #' @seealso [get_file_path()] for the generic function. get_sc_ch_episodes_path <- function(update = latest_update(), ...) { sc_ch_episodes_path <- get_file_path( - directory = fs::path(get_slf_dir(), "Social_care"), + directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_all_care_home"), file_name = stringr::str_glue("all_ch_episodes_{update}.parquet"), ... ) @@ -33,7 +33,7 @@ get_sc_ch_episodes_path <- function(update = latest_update(), ...) { #' @seealso [get_file_path()] for the generic function. get_sc_at_episodes_path <- function(update = latest_update(), ...) { sc_at_episodes_path <- get_file_path( - directory = fs::path(get_slf_dir(), "Social_care"), + directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_all_alarms_telecare"), file_name = stringr::str_glue("all_at_episodes_{update}.parquet"), ... ) @@ -53,7 +53,7 @@ get_sc_at_episodes_path <- function(update = latest_update(), ...) { #' @seealso [get_file_path()] for the generic function. get_sc_hc_episodes_path <- function(update = latest_update(), ...) { sc_hc_episodes_path <- get_file_path( - directory = fs::path(get_slf_dir(), "Social_care"), + directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_all_home_care"), file_name = stringr::str_glue("all_hc_episodes_{update}.parquet"), ... ) @@ -73,7 +73,7 @@ get_sc_hc_episodes_path <- function(update = latest_update(), ...) { #' @seealso [get_file_path()] for the generic function. get_sc_sds_episodes_path <- function(update = latest_update(), ...) { sc_sds_episodes_path <- get_file_path( - directory = fs::path(get_slf_dir(), "Social_care"), + directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_all_sds"), file_name = stringr::str_glue("all_sds_episodes_{update}.parquet"), ... ) diff --git a/R/get_sc_lookup_paths.R b/R/get_sc_lookup_paths.R index 5add38b08..be0fa3eb6 100644 --- a/R/get_sc_lookup_paths.R +++ b/R/get_sc_lookup_paths.R @@ -14,7 +14,7 @@ #' @seealso [get_file_path()] for the generic function. get_sc_demog_lookup_path <- function(update = latest_update(), ...) { sc_demog_lookup_path <- get_file_path( - directory = fs::path(get_slf_dir(), "Social_care"), + directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_demographic_lookup"), file_name = stringr::str_glue("sc_demographics_lookup_{update}.parquet"), ... ) @@ -39,7 +39,7 @@ get_sc_demog_lookup_path <- function(update = latest_update(), ...) { #' @seealso [get_file_path()] for the generic function. get_sc_client_lookup_path <- function(year, update = latest_update(), ...) { sc_client_lookup_path <- get_file_path( - directory = fs::path(get_slf_dir(), "Social_care"), + directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_client_lookup"), file_name = stringr::str_glue("sc_client_lookup_{year}_{update}.parquet"), ... ) diff --git a/R/process_costs_rmd.R b/R/process_costs_rmd.R index 5d97d705f..bca00871d 100644 --- a/R/process_costs_rmd.R +++ b/R/process_costs_rmd.R @@ -52,6 +52,11 @@ process_costs_rmd <- function(file_name) { quiet = TRUE ) + if (fs::file_info(output_file)$user == Sys.getenv("USER")) { + # Set the correct permissions + fs::file_chmod(path = output_file, mode = "660") + } + utils::browseURL(output_file) return(NULL) diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R index 3211f0fb7..04d7082e7 100644 --- a/R/process_extract_homelessness.R +++ b/R/process_extract_homelessness.R @@ -100,6 +100,36 @@ process_extract_homelessness <- function( ) ) ) %>% + dplyr::mutate(property_type_code = as.character(property_type_code)) %>% + dplyr::mutate( + property_type_code = dplyr::case_when( + property_type_code == "1" ~ "1 - Own Property - LA Tenancy", + property_type_code == "2" ~ "2 - Own Property - RSL Tenancy", + property_type_code == "3" ~ "3 - Own Property - private rented tenancy", + property_type_code == "4" ~ "4 - Own Property - tenancy secured through employment/tied house", + property_type_code == "5" ~ "5 - Own Property - owning/buying", + property_type_code == "6" ~ "6 - Parental / family home / relatives", + property_type_code == "7" ~ " 7 - Friends / partners", + property_type_code == "8" ~ "8 - Armed Services Accommodation", + property_type_code == "9" ~ "9 - Prison", + property_type_code == "10" ~ "10 - Hospital", + property_type_code == "11" ~ "11 - Children's residential accommodation (looked after by the local authority)", + property_type_code == "12" ~ "12 - Supported accommodation", + property_type_code == "13" ~ "13 - Hostel (unsupported)", + property_type_code == "14" ~ "14 - Bed & Breakfast", + property_type_code == "15" ~ "15 - Caravan / mobile home", + property_type_code == "16" ~ "16 - Long-term roofless", + property_type_code == "17" ~ "17 - Long-term sofa surfing", + property_type_code == "18" ~ "18 - Other", + property_type_code == "19" ~ "19 - Not known / refused", + property_type_code == "20" ~ "20 - Own property - Shared ownership/Shared equity/ LCHO", + property_type_code == "21" ~ "21 - Lodger", + property_type_code == "22" ~ "22 - Shared Property - Private Rented Sector", + property_type_code == "23" ~ "23 - Shared Property - Local Authority", + property_type_code == "24" ~ "24 - Shared Property - RSL", + TRUE ~ property_type_code + ) + ) %>% dplyr::left_join( la_code_lookup, by = dplyr::join_by("sending_local_authority_code_9" == "CA") diff --git a/R/process_lookup_sc_demographics.R b/R/process_lookup_sc_demographics.R index 8c363f547..96adc985e 100644 --- a/R/process_lookup_sc_demographics.R +++ b/R/process_lookup_sc_demographics.R @@ -28,30 +28,46 @@ process_lookup_sc_demographics <- function( dplyr::pull(.data$pc7) - # Data Cleaning --------------------------------------- - + # Fill in missing data and flag latest cases to keep --------------------------------------- sc_demog <- data %>% - dplyr::mutate( - # use chi if upi is NA - upi = dplyr::coalesce(.data$upi, .data$chi_upi), - # check gender code - replace code 99 with 9 - submitted_gender = replace(.data$submitted_gender, .data$submitted_gender == 99L, 9L) + dplyr::rename( + chi = chi_upi, + gender = chi_gender_code, + dob = chi_date_of_birth ) %>% + # fill in missing demographic details + dplyr::arrange(period, social_care_id) %>% + dplyr::group_by(social_care_id, sending_location) %>% + tidyr::fill(chi, .direction = ("updown")) %>% + tidyr::fill(dob, .direction = ("updown")) %>% + tidyr::fill(date_of_death, .direction = ("updown")) %>% + tidyr::fill(gender, .direction = ("updown")) %>% + tidyr::fill(chi_postcode, .direction = ("updown")) %>% + tidyr::fill(submitted_postcode, .direction = ("updown")) %>% + dplyr::ungroup() %>% + # format postcodes using `phsmethods` + dplyr::mutate(dplyr::across(tidyselect::contains("postcode"), ~ phsmethods::format_postcode(.x, format = "pc7"))) # are sc postcodes even used anywhere? + + + # flag unique cases of chi and sc_id, and flag the latest record (sc_demographics latest flag is not accurate) + sc_demog <- sc_demog %>% + dplyr::group_by(chi, sending_location) %>% + dplyr::mutate(latest = dplyr::last(period)) %>% # flag latest period for chi + dplyr::group_by(chi, social_care_id, sending_location) %>% + dplyr::mutate(latest_sc_id = dplyr::last(period)) %>% # flag latest period for social care + dplyr::group_by(chi, sending_location) %>% + dplyr::mutate(last_sc_id = dplyr::last(social_care_id)) %>% dplyr::mutate( - # use CHI sex if available - gender = dplyr::if_else( - is.na(.data$chi_gender_code) | .data$chi_gender_code == 9L, - .data$submitted_gender, - .data$chi_gender_code - ), - # Use CHI DoB if available - dob = dplyr::coalesce(.data$chi_date_of_birth, .data$submitted_date_of_birth) + latest_flag = ifelse((latest == period & last_sc_id == social_care_id) | is.na(chi), 1, 0), + keep = ifelse(latest_sc_id == period, 1, 0) ) %>% - # format postcodes using `phsmethods` - dplyr::mutate(dplyr::across( - tidyselect::contains("postcode"), - ~ phsmethods::format_postcode(.x, format = "pc7") - )) + dplyr::ungroup() + + sc_demog <- sc_demog %>% + dplyr::select(-period, -latest_record_flag, -latest, -last_sc_id, -latest_sc_id) %>% + dplyr::distinct() + + # postcodes --------------------------------------------------------------- # count number of na postcodes na_postcodes <- sc_demog %>% @@ -69,29 +85,32 @@ process_lookup_sc_demographics <- function( ~ dplyr::if_else(stringr::str_detect(.x, uk_pc_regexp), .x, NA) )) %>% dplyr::select( - "latest_record_flag", - "extract_date", "sending_location", "social_care_id", - "upi", + "chi", "gender", "dob", + "date_of_death", "submitted_postcode", - "chi_postcode" + "chi_postcode", + "keep", + "latest_flag" ) %>% # check if submitted_postcode matches with postcode lookup dplyr::mutate( - valid_pc = .data$submitted_postcode %in% valid_spd_postcodes + valid_pc_submitted = .data$submitted_postcode %in% valid_spd_postcodes, + valid_pc_chi = .data$chi_postcode %in% valid_spd_postcodes ) %>% # use submitted_postcode if valid, otherwise use chi_postcode dplyr::mutate(postcode = dplyr::case_when( - (!is.na(.data$submitted_postcode) & .data$valid_pc) ~ .data$submitted_postcode, - (is.na(.data$submitted_postcode) & !.data$valid_pc) ~ .data$chi_postcode + (!is.na(.data$chi_postcode) & .data$valid_pc_chi) ~ .data$chi_postcode, + ((is.na(.data$chi_postcode) | !(.data$valid_pc_chi)) & !(is.na(.data$submitted_postcode)) & .data$valid_pc_submitted) ~ .data$submitted_postcode, + (is.na(.data$submitted_postcode) & !.data$valid_pc_submitted) ~ .data$chi_postcode )) %>% dplyr::mutate(postcode_type = dplyr::case_when( - (!is.na(.data$submitted_postcode) & .data$valid_pc) ~ "submitted", - (is.na(.data$submitted_postcode) & !.data$valid_pc) ~ "chi", - (is.na(.data$submitted_postcode) & is.na(.data$chi_postcode)) ~ "missing" + (postcode == chi_postcode) ~ "chi", + (postcode == submitted_postcode) ~ "submitted", + (is.na(.data$submitted_postcode) & is.na(.data$chi_postcode) | is.na(.data$postcode)) ~ "missing" )) # Check where the postcodes are coming from @@ -102,26 +121,32 @@ process_lookup_sc_demographics <- function( na_replaced_postcodes <- sc_demog %>% dplyr::count(dplyr::across(tidyselect::ends_with("_postcode"), ~ is.na(.x))) - sc_demog_lookup <- sc_demog %>% + dplyr::filter(keep == 1) %>% # filter to only keep latest record for sc id and chi + dplyr::select(-postcode_type, -valid_pc_submitted, -valid_pc_chi, -submitted_postcode, -chi_postcode) %>% + dplyr::distinct() %>% # group by sending location and ID - dplyr::group_by(.data$sending_location, .data$social_care_id) %>% + dplyr::group_by(.data$sending_location, .data$chi, .data$social_care_id, .data$latest_flag) %>% # arrange so latest submissions are last dplyr::arrange( .data$sending_location, .data$social_care_id, - .data$latest_record_flag, - .data$extract_date + .data$latest_flag ) %>% # summarise to select the last (non NA) submission dplyr::summarise( - chi = dplyr::last(.data$upi), gender = dplyr::last(.data$gender), dob = dplyr::last(.data$dob), - postcode = dplyr::last(.data$postcode) + postcode = dplyr::last(.data$postcode), + date_of_death = dplyr::last(.data$date_of_death) ) %>% dplyr::ungroup() + # check to make sure all cases of chi are still there + dplyr::n_distinct(sc_demog_lookup$chi) # 524810 + dplyr::n_distinct(sc_demog_lookup$social_care_id) # 636404 + + if (write_to_disk) { write_file( sc_demog_lookup, diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R index 988d1f3e7..77877d584 100644 --- a/R/process_sc_all_alarms_telecare.R +++ b/R/process_sc_all_alarms_telecare.R @@ -17,58 +17,86 @@ process_sc_all_alarms_telecare <- function( write_to_disk = TRUE) { # Data Cleaning----------------------------------------------------- - replaced_dates <- data %>% - # If the end date is missing, set this to the end of the period - dplyr::mutate( - service_end_date = fix_sc_missing_end_dates( - .data$service_end_date, - .data$period_end_date - ), - # If the start_date is missing, set this to the start of the period - service_start_date = fix_sc_start_dates( - .data$service_start_date, - .data$period_start_date - ), - # Fix service_end_date if earlier than service_start_date by setting end_date to the end of fy - service_end_date = fix_sc_end_dates( - .data$service_start_date, - .data$service_end_date, - .data$period - ) + # Convert to data.table + data.table::setDT(data) + data.table::setDT(sc_demog_lookup) + + # Fix dates and create new variables + data[ + , + service_end_date := fix_sc_missing_end_dates( + service_end_date, + period_end_date + ) + ] + data[ + , + service_start_date := fix_sc_start_dates( + service_start_date, + period_start_date ) + ] + data[ + , + service_end_date := fix_sc_end_dates( + service_start_date, + service_end_date, + period_end_date + ) + ] - at_full_clean <- replaced_dates %>% - # rename for matching source variables - dplyr::rename( - record_keydate1 = "service_start_date", - record_keydate2 = "service_end_date" - ) %>% - # Include source variables - dplyr::mutate( - recid = "AT", - smrtype = dplyr::case_when( - .data$service_type == 1L ~ "AT-Alarm", - .data$service_type == 2L ~ "AT-Tele" + # Rename columns + data.table::setnames( + data, + old = c("service_start_date", "service_end_date"), + new = c("record_keydate1", "record_keydate2") + ) + + # Additional mutations + data[ + , + c( + "recid", + "smrtype", + "sc_send_lca" + ) := list( + "AT", + data.table::fcase( + service_type == 1L, + "AT-Alarm", + service_type == 2L, + "AT-Tele", + default, + NA_character_ ), - # Create person id variable - person_id = stringr::str_glue("{sending_location}-{social_care_id}"), - # Use function for creating sc send lca variables - sc_send_lca = convert_sc_sending_location_to_lca(.data$sending_location) - ) %>% - # Match on demographics data (chi, gender, dob and postcode) - dplyr::left_join( - sc_demog_lookup, - by = c("sending_location", "social_care_id") - ) %>% - # when multiple social_care_id from sending_location for single CHI - # replace social_care_id with latest - replace_sc_id_with_latest() + convert_sc_sending_location_to_lca(sending_location) + ) + ] + + # RIGHT_JOIN with sc_demog_lookup + data <- data[sc_demog_lookup, on = .(sending_location, social_care_id)] + + # Replace social_care_id with latest if needed (assuming replace_sc_id_with_latest is a custom function) + data <- replace_sc_id_with_latest(data) - # Deal with episodes which have a package across quarters. - qtr_merge <- at_full_clean %>% - # use as.data.table to change the data format to data.table to accelerate - data.table::as.data.table() %>% + data$person_id <- paste0( + data$sending_location, + "-", + data$social_care_id + ) + + # Deal with episodes that have a package across quarters + data[, pkg_count := seq_len(.N), by = .( + sending_location, + social_care_id, + record_keydate1, + smrtype, + period + )] + + # Order data before summarizing + data <- data %>% dplyr::group_by( .data$sending_location, .data$social_care_id, @@ -76,38 +104,33 @@ process_sc_all_alarms_telecare <- function( .data$smrtype, .data$period ) %>% - # Create a count for the package number across episodes - dplyr::mutate(pkg_count = dplyr::row_number()) %>% # Sort prior to merging dplyr::arrange(.by_group = TRUE) %>% - # group for merging episodes - dplyr::group_by( - .data$sending_location, - .data$social_care_id, - .data$record_keydate1, - .data$smrtype, - .data$pkg_count - ) %>% - # merge episodes with packages across quarters - # drop variables not needed - dplyr::summarise( - sending_location = dplyr::last(.data$sending_location), - social_care_id = dplyr::last(.data$social_care_id), - sc_latest_submission = dplyr::last(.data$period), - record_keydate1 = dplyr::last(.data$record_keydate1), - record_keydate2 = dplyr::last(.data$record_keydate2), - smrtype = dplyr::last(.data$smrtype), - pkg_count = dplyr::last(.data$pkg_count), - chi = dplyr::last(.data$chi), - gender = dplyr::last(.data$gender), - dob = dplyr::last(.data$dob), - postcode = dplyr::last(.data$postcode), - recid = dplyr::last(.data$recid), - person_id = dplyr::last(.data$person_id), - sc_send_lca = dplyr::last(.data$sc_send_lca) - ) %>% - # change the data format from data.table to data.frame - tibble::as_tibble() + dplyr::ungroup() %>% + data.table::as.data.table() + + # Summarize to merge episodes + qtr_merge <- data[, .( + sc_latest_submission = data.table::last(period), + record_keydate2 = data.table::last(record_keydate2), + chi = data.table::last(chi), + gender = data.table::last(gender), + dob = data.table::last(dob), + postcode = data.table::last(postcode), + recid = data.table::last(recid), + person_id = data.table::last(person_id), + sc_send_lca = data.table::last(sc_send_lca) + ), by = .( + sending_location, + social_care_id, + record_keydate1, + smrtype, + pkg_count + )] + + # Convert back to data.frame if necessary + qtr_merge <- as.data.frame(qtr_merge) + if (write_to_disk) { write_file( diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R index bc3d3bdfc..3ada9a2da 100644 --- a/R/process_sc_all_home_care.R +++ b/R/process_sc_all_home_care.R @@ -16,6 +16,7 @@ process_sc_all_home_care <- function( sc_demog_lookup, write_to_disk = TRUE) { replaced_dates <- data %>% + dplyr::filter(.data$hc_start_date_after_period_end_date != 1) %>% dplyr::mutate( hc_service_end_date = fix_sc_missing_end_dates( .data$hc_service_end_date, @@ -28,7 +29,7 @@ process_sc_all_home_care <- function( hc_service_end_date = fix_sc_end_dates( .data$hc_service_start_date, .data$hc_service_end_date, - .data$period + .data$hc_period_end_date ) ) diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R index f9ca52f24..a1a1db24a 100644 --- a/R/process_sc_all_sds.R +++ b/R/process_sc_all_sds.R @@ -16,122 +16,166 @@ process_sc_all_sds <- function( write_to_disk = TRUE) { # Match on demographics data (chi, gender, dob and postcode) matched_sds_data <- data %>% - dplyr::left_join( + dplyr::filter(.data$sds_start_date_after_period_end_date != 1) %>% + dplyr::right_join( sc_demog_lookup, by = c("sending_location", "social_care_id") ) %>% # when multiple social_care_id from sending_location for single CHI # replace social_care_id with latest - replace_sc_id_with_latest() - - # Data Cleaning --------------------------------------- - sds_full_clean <- matched_sds_data %>% - # Deal with SDS option 4 - # First turn the option flags into a logical T/F - dplyr::mutate(dplyr::across( - tidyselect::starts_with("sds_option_"), - ~ dplyr::case_when( - .x == 1L ~ TRUE, - .x == 0L ~ FALSE, - is.na(.x) ~ FALSE - ) - )) %>% - # SDS option 4 is derived when a person receives more than one option. - # e.g. if a person has options 1 and 2 then option 4 will be derived - dplyr::mutate( - sds_option_4 = rowSums( - dplyr::pick(tidyselect::starts_with("sds_option_")) - ) > 1L, - .after = .data$sds_option_3 - ) %>% - # If SDS start date is missing, assign start of FY - dplyr::mutate( - sds_start_date = fix_sc_start_dates( - .data$sds_start_date, - .data$sds_period_start_date - ), - # If SDS end date is missing, assign end of FY - sds_end_date = fix_sc_missing_end_dates( - .data$sds_end_date, - .data$sds_period_end_date - ), - # Fix sds_end_date is earlier than sds_start_date by setting end_date to be the end of fyear - sds_end_date = fix_sc_end_dates( - .data$sds_start_date, - .data$sds_end_date, - .data$period - ) - ) %>% - # rename for matching source variables - dplyr::rename( - record_keydate1 = .data$sds_start_date, - record_keydate2 = .data$sds_end_date - ) %>% - # Pivot longer on sds option variables - tidyr::pivot_longer( - cols = tidyselect::contains("sds_option_"), - names_to = "sds_option", - names_prefix = "sds_option_", - names_transform = list(sds_option = ~ paste0("SDS-", .x)), - values_to = "received" - ) %>% - # Only keep rows where they received a package and remove duplicates - dplyr::filter(.data$received) %>% + replace_sc_id_with_latest() %>% + dplyr::select(-sds_start_date_after_period_end_date) %>% dplyr::distinct() %>% - # Include source variables + # sds_options may contain only a few NA, replace NA by 0 dplyr::mutate( - smrtype = dplyr::case_when( - sds_option == "SDS-1" ~ "SDS-1", - sds_option == "SDS-2" ~ "SDS-2", - sds_option == "SDS-3" ~ "SDS-3", - sds_option == "SDS-4" ~ "SDS-4" - ), - recid = "SDS", - # Create person id variable - person_id = stringr::str_glue("{sending_location}-{social_care_id}"), - # Use function for creating sc send lca variables - sc_send_lca = convert_sc_sending_location_to_lca(.data$sending_location) + sds_option_1 = tidyr::replace_na(sds_option_1, 0), + sds_option_2 = tidyr::replace_na(sds_option_2, 0), + sds_option_3 = tidyr::replace_na(sds_option_3, 0) ) - final_data <- sds_full_clean %>% - # use as.data.table to change the data format to data.table to accelerate - data.table::as.data.table() %>% - dplyr::group_by( - .data$sending_location, - .data$social_care_id, - .data$smrtype - ) %>% - dplyr::arrange(.data$period, - .data$record_keydate1, - .by_group = TRUE - ) %>% - # Create a flag for episodes that are going to be merged - # Create an episode counter - dplyr::mutate( - distinct_episode = (.data$record_keydate1 > dplyr::lag(.data$record_keydate2)) %>% - tidyr::replace_na(TRUE), - episode_counter = cumsum(.data$distinct_episode) - ) %>% - # Group by episode counter and merge episodes - dplyr::group_by(.data$episode_counter, .add = TRUE) %>% - dplyr::summarise( - sc_latest_submission = dplyr::last(.data$period), - record_keydate1 = min(.data$record_keydate1), - record_keydate2 = max(.data$record_keydate2), - sending_location = dplyr::last(.data$sending_location), - social_care_id = dplyr::last(.data$social_care_id), - chi = dplyr::last(.data$chi), - gender = dplyr::last(.data$gender), - dob = dplyr::last(.data$dob), - postcode = dplyr::last(.data$postcode), - recid = dplyr::last(.data$recid), - person_id = dplyr::last(.data$person_id), - sc_send_lca = dplyr::last(.data$sc_send_lca) - ) %>% - dplyr::ungroup() %>% - dplyr::select(-.data$episode_counter) %>% - # change the data format from data.table to data.frame - tibble::as_tibble() + # Data Cleaning --------------------------------------- + # Convert matched_sds_data to data.table + sds_full_clean <- data.table::as.data.table(matched_sds_data) + rm(matched_sds_data) + + # Deal with SDS option 4 + # Convert option flags into logical T/F + cols_sds_option <- grep( + "^sds_option_", + names(sds_full_clean), + value = TRUE + ) + sds_full_clean[, (cols_sds_option) := lapply(.SD, function(x) { + data.table::fifelse(x == 1L, TRUE, FALSE) + }), + .SDcols = cols_sds_option + ] + + # Derived SDS option 4 when a person receives more than one option + sds_full_clean[, + sds_option_4 := rowSums(.SD) > 1L, + .SDcols = cols_sds_option + ] + + # If SDS start date or end date is missing, assign start/end of FY + sds_full_clean[ + , + sds_start_date := fix_sc_start_dates(sds_start_date, sds_period_start_date) + ] + sds_full_clean[ + , + sds_end_date := fix_sc_missing_end_dates(sds_end_date, sds_period_end_date) + ] + sds_full_clean[ + , + sds_end_date := fix_sc_end_dates(sds_start_date, sds_end_date, sds_period_end_date) + ] + + sds_full_clean[, c( + "sds_period_start_date", + "sds_period_end_date", + "sds_start_date_after_end_date" + ) := NULL] + + # Rename for matching source variables + data.table::setnames( + sds_full_clean, + c("sds_start_date", "sds_end_date"), + c("record_keydate1", "record_keydate2") + ) + + sds_full_clean <- unique(sds_full_clean) + + cols_sds_option <- grep( + "^sds_option_", + names(sds_full_clean), + value = TRUE + ) + # Pivot longer on sds option variables + sds_full_clean_long <- data.table::melt( + sds_full_clean, + id.vars = setdiff(names(sds_full_clean), cols_sds_option), + measure.vars = cols_sds_option, + variable.name = "sds_option", + value.name = "received" + ) + rm(sds_full_clean) + sds_full_clean_long <- sds_full_clean_long[received == TRUE, ] + sds_full_clean_long[ + , + sds_option := paste0("SDS-", sub("sds_option_", "", sds_option)) + ] + + # Filter rows where they received a package and remove duplicates + sds_full_clean_long <- unique(sds_full_clean_long) + + # Include source variables + sds_full_clean_long[, c( + "smrtype", + "recid", + "sc_send_lca" + ) := + list( + sds_option, + "SDS", + convert_sc_sending_location_to_lca(sending_location) + )] + sds_full_clean_long$person_id <- paste0( + sds_full_clean_long$sending_location, + "-", + sds_full_clean_long$social_care_id + ) + + # Group, arrange and create flags for episodes + sds_full_clean_long[, + c( + "period_rank", + "record_keydate1_rank", + "record_keydate2_rank" + ) := list( + rank(period), + rank(record_keydate1), + rank(record_keydate2) + ), + by = .(sending_location, social_care_id, smrtype) + ] + data.table::setorder( + sds_full_clean_long, + period_rank, + record_keydate1_rank, + record_keydate2_rank + ) + + sds_full_clean_long[, + distinct_episode := + (data.table::shift(record_keydate2, type = "lag") < record_keydate1) %>% + tidyr::replace_na(TRUE), + by = .(sending_location, social_care_id, smrtype) + ] + + sds_full_clean_long[, + episode_counter := cumsum(distinct_episode), + by = .(sending_location, social_care_id, smrtype) + ] + + # Merge episodes by episode counter + final_data <- sds_full_clean_long[, .( + sc_latest_submission = data.table::last(period), + record_keydate1 = min(record_keydate1), + record_keydate2 = max(record_keydate2), + chi = data.table::last(chi), + gender = data.table::last(gender), + dob = data.table::last(dob), + postcode = data.table::last(postcode), + recid = data.table::last(recid), + person_id = data.table::last(person_id), + sc_send_lca = data.table::last(sc_send_lca) + ), by = .(sending_location, social_care_id, smrtype, episode_counter)] + rm(sds_full_clean_long) + + # Drop episode_counter and convert back to data.frame if needed + final_data <- as.data.frame(final_data[, -"episode_counter"]) + # final_data now holds the processed data in the format of a data.frame if (write_to_disk) { write_file( diff --git a/R/process_tests_acute.R b/R/process_tests_acute.R index 759d866b7..3e01a463a 100644 --- a/R/process_tests_acute.R +++ b/R/process_tests_acute.R @@ -1,7 +1,7 @@ #' Process Acute tests #' #' @description Takes the processed Acute extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @param data a [tibble][tibble::tibble-package] of the processed data extract. #' @param year the financial year of the extract in the format '1718'. @@ -18,7 +18,7 @@ process_tests_acute <- function(data, year) { old_data = produce_source_extract_tests(old_data), new_data = produce_source_extract_tests(data) ) %>% - write_tests_xlsx(sheet_name = "01B", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "01b", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_ae.R b/R/process_tests_ae.R index 5bcd6a3c9..802cc8c6c 100644 --- a/R/process_tests_ae.R +++ b/R/process_tests_ae.R @@ -1,7 +1,7 @@ #' Process A&E tests #' #' @description This script takes the processed A&E extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_acute #' @@ -21,7 +21,7 @@ process_tests_ae <- function(data, year) { max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net") ) ) %>% - write_tests_xlsx(sheet_name = "AE2", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "ae2", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_alarms_telecare.R b/R/process_tests_alarms_telecare.R index d7f9fa699..52daef496 100644 --- a/R/process_tests_alarms_telecare.R +++ b/R/process_tests_alarms_telecare.R @@ -18,7 +18,7 @@ process_tests_alarms_telecare <- function(data, year) { ) comparison %>% - write_tests_xlsx(sheet_name = "AT", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "at", year, workbook_name = "extract") return(comparison) } @@ -37,14 +37,14 @@ produce_source_at_tests <- function(data, max_min_vars = c("record_keydate1", "record_keydate2")) { test_flags <- data %>% # create test flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% dplyr::mutate( n_at_alarms = .data$smrtype == "AT-Alarm", n_at_telecare = .data$smrtype == "AT-Tele" ) %>% create_lca_test_flags(.data$sc_send_lca) %>% # remove variables that won't be summed - dplyr::select(.data$valid_chi:.data$West_Lothian) %>% + dplyr::select(.data$unique_chi:.data$West_Lothian) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_care_home.R b/R/process_tests_care_home.R index 2032c2473..21ef3e5c9 100644 --- a/R/process_tests_care_home.R +++ b/R/process_tests_care_home.R @@ -1,7 +1,7 @@ #' Process Care Home tests #' #' @description This script takes the processed Care home extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_acute #' @@ -15,7 +15,7 @@ process_tests_care_home <- function(data, year) { old_data = produce_source_ch_tests(old_data), new_data = produce_source_ch_tests(data) ) %>% - write_tests_xlsx(sheet_name = "CH", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "ch", year, workbook_name = "extract") return(comparison) } @@ -47,7 +47,7 @@ produce_source_ch_tests <- function(data, )) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% dplyr::mutate( n_episodes = 1L, ch_name_missing = is.na(.data$ch_name), @@ -60,7 +60,7 @@ produce_source_ch_tests <- function(data, ) %>% create_lca_test_flags(.data$sc_send_lca) %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_cmh.R b/R/process_tests_cmh.R index 09a17bdbb..dde710c00 100644 --- a/R/process_tests_cmh.R +++ b/R/process_tests_cmh.R @@ -1,7 +1,7 @@ #' Process CMH tests #' #' @description This script takes the processed CMH extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_acute #' @@ -20,7 +20,7 @@ process_tests_cmh <- function(data, year) { old_data = produce_source_cmh_tests(old_data), new_data = produce_source_cmh_tests(data) ) %>% - write_tests_xlsx(sheet_name = "CMH", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "cmh", year, workbook_name = "extract") return(comparison) } @@ -43,11 +43,11 @@ process_tests_cmh <- function(data, year) { produce_source_cmh_tests <- function(data) { test_flags <- data %>% # create test flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% create_hb_test_flags(hb_var = .data$hbrescode) %>% dplyr::mutate(n_episodes = 1L) %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_delayed_discharges.R b/R/process_tests_delayed_discharges.R index c2370eb76..0355ad0e2 100644 --- a/R/process_tests_delayed_discharges.R +++ b/R/process_tests_delayed_discharges.R @@ -1,7 +1,7 @@ #' Process Delayed Discharges tests #' #' @description Takes the processed Delayed Discharges extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @param data a [tibble][tibble::tibble-package] of the processed data extract. #' @param year the financial year of the extract in the format '1718'. @@ -18,7 +18,7 @@ process_tests_delayed_discharges <- function(data, year) { old_data = produce_source_dd_tests(old_data), new_data = produce_source_dd_tests(data) ) %>% - write_tests_xlsx(sheet_name = "DD", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "dd", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_district_nursing.R b/R/process_tests_district_nursing.R index d3d55a15a..b354fde09 100644 --- a/R/process_tests_district_nursing.R +++ b/R/process_tests_district_nursing.R @@ -13,14 +13,7 @@ process_tests_district_nursing <- function(data, year) { return(data) } - old_data <- get_existing_data_for_tests(data) %>% - # TODO: remove this bit after SPSS stopped - # replace NA by 0 in monthly costs - dplyr::mutate(dplyr::across( - dplyr::ends_with("_cost"), - ~ tidyr::replace_na(.x, 0.0) - )) - + old_data <- get_existing_data_for_tests(data) data <- rename_hscp(data) comparison <- produce_test_comparison( @@ -65,11 +58,11 @@ produce_source_dn_tests <- function(data, )) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% create_hb_test_flags(.data$hbtreatcode) %>% create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>% # keep variables for comparison - dplyr::select(.data$valid_chi:.data$NHS_Lanarkshire_cost) %>% + dplyr::select(.data$unique_chi:.data$NHS_Lanarkshire_cost) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R index eaa946e3e..6f2c73fcb 100644 --- a/R/process_tests_episode_file.R +++ b/R/process_tests_episode_file.R @@ -1,7 +1,7 @@ #' Process Episode file tests #' #' @description Takes the processed episode file and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_acute #' @@ -73,15 +73,7 @@ produce_episode_file_tests <- function( test_flags <- data %>% dplyr::group_by(.data$recid) %>% # use functions to create HB and partnership flags - dplyr::mutate( - unique_anon_chi = dplyr::lag(.data$anon_chi) != .data$anon_chi, - n_missing_anon_chi = is_missing(.data$anon_chi), - n_males = .data$gender == 1L, - n_females = .data$gender == 2L, - n_postcode = !is.na(.data$postcode) | !.data$postcode == "", - n_missing_postcode = is_missing(.data$postcode), - missing_dob = is.na(.data$dob) - ) %>% + create_demog_test_flags(chi = anon_chi) %>% create_hb_test_flags(.data$hbtreatcode) %>% create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>% create_hscp_test_flags(.data$hscp2018) %>% @@ -111,7 +103,7 @@ produce_episode_file_tests <- function( test_flags <- test_flags %>% # keep variables for comparison - dplyr::select("unique_anon_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum", group_by = "recid") diff --git a/R/process_tests_gp_ooh.R b/R/process_tests_gp_ooh.R index fd3ec5f59..e9778711d 100644 --- a/R/process_tests_gp_ooh.R +++ b/R/process_tests_gp_ooh.R @@ -1,7 +1,7 @@ #' Process GP OOH tests #' #' @description This script takes the processed GP OOH extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_acute #' @@ -19,7 +19,7 @@ process_tests_gp_ooh <- function(data, year) { sum_mean_vars = "cost" ) ) %>% - write_tests_xlsx(sheet_name = "GPOoH", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "gpooh", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_home_care.R b/R/process_tests_home_care.R index c1af63e97..3ac8329e6 100644 --- a/R/process_tests_home_care.R +++ b/R/process_tests_home_care.R @@ -1,7 +1,7 @@ #' Process Home Care tests #' #' @description This script takes the processed Home Care extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_acute #' @@ -17,7 +17,7 @@ process_tests_home_care <- function(data, year) { ) comparison %>% - write_tests_xlsx(sheet_name = "home_care", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "hc", year, workbook_name = "extract") return(comparison) } @@ -49,7 +49,7 @@ produce_source_hc_tests <- function(data, )) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% dplyr::mutate( n_episodes = 1L, hc_per = dplyr::if_else(.data$smrtype == "HC-Per", 1L, 0L), @@ -61,7 +61,7 @@ produce_source_hc_tests <- function(data, ) %>% create_lca_test_flags(.data$sc_send_lca) %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_homelessness.R b/R/process_tests_homelessness.R index 4d49f1aa4..e4078d227 100644 --- a/R/process_tests_homelessness.R +++ b/R/process_tests_homelessness.R @@ -16,7 +16,7 @@ process_tests_homelessness <- function(data, year) { old_data = produce_slf_homelessness_tests(old_data), new_data = produce_slf_homelessness_tests(data) ) %>% - write_tests_xlsx(sheet_name = "HL1", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "hl1", year, workbook_name = "extract") return(comparison) } @@ -38,10 +38,10 @@ produce_slf_homelessness_tests <- function(data, test_flags <- data %>% dplyr::arrange(.data$chi) %>% # create test flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% create_lca_test_flags(.data$hl1_sending_lca) %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index bbd13948c..3770d6d26 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -1,7 +1,7 @@ #' Process Individual file tests #' #' @description Takes the processed individual file and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_acute #' @@ -60,19 +60,11 @@ produce_individual_file_tests <- function(data) { test_flags <- data %>% # use functions to create HB and partnership flags - dplyr::mutate( - unique_anon_chi = dplyr::lag(.data$anon_chi) != .data$anon_chi, - n_missing_anon_chi = is_missing(.data$anon_chi), - n_males = .data$gender == 1L, - n_females = .data$gender == 2L, - n_postcode = !is.na(.data$postcode) | !.data$postcode == "", - n_missing_postcode = is_missing(.data$postcode), - missing_dob = is.na(.data$dob) - ) %>% + create_demog_test_flags(chi = anon_chi) %>% create_hb_test_flags(.data$hbrescode) %>% create_hb_cost_test_flags(.data$hbrescode, .data$health_net_cost) %>% # keep variables for comparison - dplyr::select(c("unique_anon_chi":dplyr::last_col())) %>% + dplyr::select(c("unique_chi":dplyr::last_col())) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_lookup_gpprac.R b/R/process_tests_lookup_gpprac.R index 453bcaa24..6bfc35356 100644 --- a/R/process_tests_lookup_gpprac.R +++ b/R/process_tests_lookup_gpprac.R @@ -1,7 +1,7 @@ #' Process GP (gpprac) Lookup tests #' #' @description This script takes the processed gpprac lookup and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_lookup_pc #' diff --git a/R/process_tests_lookup_pc.R b/R/process_tests_lookup_pc.R index e018af70b..e0d0aeab8 100644 --- a/R/process_tests_lookup_pc.R +++ b/R/process_tests_lookup_pc.R @@ -5,7 +5,7 @@ #' [previous_update()]. #' #' @description This script takes the processed acute extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @return a [tibble][tibble::tibble-package] containing a test comparison. #' diff --git a/R/process_tests_ltcs.R b/R/process_tests_ltcs.R index 93f35b36d..c667ad745 100644 --- a/R/process_tests_ltcs.R +++ b/R/process_tests_ltcs.R @@ -1,7 +1,7 @@ #' Process LTCs tests #' #' @description This script takes the processed LTCs extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_acute #' diff --git a/R/process_tests_maternity.R b/R/process_tests_maternity.R index 90f0ec449..6cc043bcb 100644 --- a/R/process_tests_maternity.R +++ b/R/process_tests_maternity.R @@ -1,7 +1,7 @@ #' Process Maternity tests #' #' @description This script takes the processed homelessness extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_acute #' @@ -15,7 +15,7 @@ process_tests_maternity <- function(data, year) { old_data = produce_source_extract_tests(old_data), new_data = produce_source_extract_tests(data) ) %>% - write_tests_xlsx(sheet_name = "02B", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "02b", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_mental_health.R b/R/process_tests_mental_health.R index 96283d47b..2a3c0f026 100644 --- a/R/process_tests_mental_health.R +++ b/R/process_tests_mental_health.R @@ -1,7 +1,7 @@ #' Process Mental Health tests #' #' @description This script takes the processed homelessness extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_acute #' @@ -15,7 +15,7 @@ process_tests_mental_health <- function(data, year) { old_data = produce_source_extract_tests(old_data), new_data = produce_source_extract_tests(data) ) %>% - write_tests_xlsx(sheet_name = "04B", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "04b", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_nrs_deaths.R b/R/process_tests_nrs_deaths.R index c1a963dcf..d87fbde7b 100644 --- a/R/process_tests_nrs_deaths.R +++ b/R/process_tests_nrs_deaths.R @@ -1,7 +1,7 @@ #' Process National Records of Scotland (NRS) deaths tests #' #' @description This script takes the processed NRS deaths extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_acute #' @@ -15,7 +15,7 @@ process_tests_nrs_deaths <- function(data, year) { old_data = produce_source_nrs_tests(old_data), new_data = produce_source_nrs_tests(data) ) %>% - write_tests_xlsx(sheet_name = "NRS", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "nrs", year, workbook_name = "extract") return(comparison) } @@ -38,10 +38,10 @@ process_tests_nrs_deaths <- function(data, year) { produce_source_nrs_tests <- function(data) { test_flags <- data %>% # create test flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% dplyr::mutate(n_deaths = 1L) %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_outpatients.R b/R/process_tests_outpatients.R index 5787e6884..c9a7521b7 100644 --- a/R/process_tests_outpatients.R +++ b/R/process_tests_outpatients.R @@ -1,7 +1,7 @@ #' Process Outpatients tests #' #' @description This script takes the processed outpatients extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_acute #' @@ -23,7 +23,7 @@ process_tests_outpatients <- function(data, year) { add_hscp_count = FALSE ) ) %>% - write_tests_xlsx(sheet_name = "00B", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "00b", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_prescribing.R b/R/process_tests_prescribing.R index bac0e3c52..3ad838255 100644 --- a/R/process_tests_prescribing.R +++ b/R/process_tests_prescribing.R @@ -1,7 +1,7 @@ #' Process prescribing tests #' #' @description This script takes the processed prescribing extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_acute #' @@ -15,7 +15,7 @@ process_tests_prescribing <- function(data, year) { old_data = produce_source_pis_tests(old_data), new_data = produce_source_pis_tests(data) ) %>% - write_tests_xlsx(sheet_name = "PIS", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "pis", year, workbook_name = "extract") return(comparison) } @@ -41,10 +41,10 @@ process_tests_prescribing <- function(data, year) { produce_source_pis_tests <- function(data) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% dplyr::mutate(n_episodes = 1L) %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_sc_demographics.R b/R/process_tests_sc_demographics.R index dfb110aa9..b503969ef 100644 --- a/R/process_tests_sc_demographics.R +++ b/R/process_tests_sc_demographics.R @@ -36,7 +36,7 @@ process_tests_sc_demographics <- function(data) { produce_sc_demog_lookup_tests <- function(data) { data %>% # create test flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% dplyr::mutate( n_missing_sending_loc = is.na(.data$sending_location), n_missing_sc_id = is.na(.data$social_care_id) diff --git a/R/process_tests_sds.R b/R/process_tests_sds.R index f624f504b..c972a3a6f 100644 --- a/R/process_tests_sds.R +++ b/R/process_tests_sds.R @@ -1,7 +1,7 @@ #' Process SDS tests #' #' @description This script takes the processed SDS extract and produces -#' a test comparison with the previous data. This is written to disk as a CSV. +#' a test comparison with the previous data. This is written to disk as an xlsx. #' #' @inherit process_tests_acute #' @@ -35,10 +35,10 @@ produce_source_sds_tests <- function(data, max_min_vars = c("record_keydate1", "record_keydate2")) { test_flags <- data %>% # create test flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% create_lca_test_flags(.data$sc_send_lca) %>% # remove variables that won't be summed - dplyr::select("valid_chi":"West_Lothian") %>% + dplyr::select("unique_chi":"West_Lothian") %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/produce_homelessness_completeness.R b/R/produce_homelessness_completeness.R index 00a459df7..9e157df24 100644 --- a/R/produce_homelessness_completeness.R +++ b/R/produce_homelessness_completeness.R @@ -40,9 +40,9 @@ produce_homelessness_completeness <- function( sg_all_assessments_annual <- openxlsx::read.xlsx( sg_pub_path, - sheet = "Table 1", + sheet = "Table 2", rows = 8L:39L, - cols = 1L:25L, + cols = 1L:29L, colNames = FALSE ) %>% dplyr::rename_with(~ c( @@ -52,7 +52,8 @@ produce_homelessness_completeness <- function( paste0(paste0("q", 1L:4L), "_", rep(2018L, 4L)), paste0(paste0("q", 1L:4L), "_", rep(2019L, 4L)), paste0(paste0("q", 1L:4L), "_", rep(2020L, 4L)), - paste0(paste0("q", 1L:4L), "_", rep(2021L, 4L)) + paste0(paste0("q", 1L:4L), "_", rep(2021L, 4L)), + paste0(paste0("q", 1L:4L), "_", rep(2022L, 4L)) )) %>% tidyr::pivot_longer( !"CAName", @@ -124,7 +125,7 @@ produce_homelessness_completeness <- function( get_sg_homelessness_pub_path <- function(...) { path <- get_file_path( directory = fs::path(get_slf_dir(), "Homelessness"), - file_name = "2022.09.12 - PHS - Total assessment decisions by LA by Qtr.xlsx", + file_name = "2024.02.07- PHS - Total assessment decisions by LA by Qtr.xlsx", ... ) diff --git a/R/produce_sc_all_episodes_tests.R b/R/produce_sc_all_episodes_tests.R index efe980cd4..4c5f736bb 100644 --- a/R/produce_sc_all_episodes_tests.R +++ b/R/produce_sc_all_episodes_tests.R @@ -10,7 +10,7 @@ produce_sc_all_episodes_tests <- function(data) { data %>% # create test flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% dplyr::mutate( n_missing_sending_loc = dplyr::if_else( is.na(.data$sending_location), @@ -24,7 +24,7 @@ produce_sc_all_episodes_tests <- function(data) { ) ) %>% # keep variables for comparison - dplyr::select(c("valid_chi":dplyr::last_col())) %>% + dplyr::select(c("unique_chi":dplyr::last_col())) %>% # use function to sum new test flags calculate_measures(measure = "sum") } diff --git a/R/produce_source_extract_tests.R b/R/produce_source_extract_tests.R index d9a07c893..13b33d549 100644 --- a/R/produce_source_extract_tests.R +++ b/R/produce_source_extract_tests.R @@ -33,7 +33,7 @@ produce_source_extract_tests <- function(data, add_hscp_count = TRUE) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% create_hb_test_flags(.data$hbtreatcode) %>% create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) @@ -43,7 +43,7 @@ produce_source_extract_tests <- function(data, test_flags <- test_flags %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/read_lookup_sc_client.R b/R/read_lookup_sc_client.R index cc98060f3..370a15722 100644 --- a/R/read_lookup_sc_client.R +++ b/R/read_lookup_sc_client.R @@ -77,5 +77,12 @@ read_lookup_sc_client <- function(fyyear, ) %>% dplyr::collect() + if (!fs::file_exists(get_sandpit_extract_path(type = "client", year = fyyear))) { + client_data %>% + write_file(get_sandpit_extract_path(type = "client", year = fyyear)) + } else { + client_data <- client_data + } + return(client_data) } diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R index fcdde5417..020542baa 100644 --- a/R/read_lookup_sc_demographics.R +++ b/R/read_lookup_sc_demographics.R @@ -12,27 +12,35 @@ read_lookup_sc_demographics <- function(sc_connection = phs_db_connection(dsn = ) %>% dplyr::select( "latest_record_flag", - "extract_date", + "period", "sending_location", + "sending_location_name", "social_care_id", - "upi", "chi_upi", - "submitted_postcode", - "chi_postcode", - "submitted_date_of_birth", "chi_date_of_birth", - "submitted_gender", + "date_of_death", + "chi_postcode", + "submitted_postcode", "chi_gender_code" ) %>% - dplyr::collect() %>% + dplyr::collect() + + if (!fs::file_exists(get_sandpit_extract_path(type = "demographics"))) { + sc_demog %>% + write_file(get_sandpit_extract_path(type = "demographics")) + } else { + sc_demog <- sc_demog + } + + sc_demog <- sc_demog %>% dplyr::mutate( dplyr::across(c( "latest_record_flag", "sending_location", - "submitted_gender", "chi_gender_code" ), as.integer) - ) + ) %>% + dplyr::distinct() return(sc_demog) } diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R index 9d4be2be4..5abd9bc7b 100644 --- a/R/read_sc_all_alarms_telecare.R +++ b/R/read_sc_all_alarms_telecare.R @@ -26,7 +26,16 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection "service_start_date_after_period_end_date" ) %>% dplyr::collect() %>% - dplyr::distinct() %>% + dplyr::distinct() + + if (!fs::file_exists(get_sandpit_extract_path(type = "at"))) { + at_full_data %>% + write_file(get_sandpit_extract_path(type = "at")) + } else { + at_full_data <- at_full_data + } + + at_full_data <- at_full_data %>% dplyr::mutate( period_start_date = dplyr::if_else( .data$period == "2017", diff --git a/R/read_sc_all_care_home.R b/R/read_sc_all_care_home.R index 505222747..870a94ded 100644 --- a/R/read_sc_all_care_home.R +++ b/R/read_sc_all_care_home.R @@ -28,7 +28,16 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn = "age" ) %>% dplyr::collect() %>% - dplyr::distinct() %>% + dplyr::distinct() + + if (!fs::file_exists(get_sandpit_extract_path(type = "ch"))) { + ch_data %>% + write_file(get_sandpit_extract_path(type = "ch")) + } else { + ch_data <- ch_data + } + + ch_data <- ch_data %>% # Correct FY 2017 dplyr::mutate(period = dplyr::if_else( .data$period == "2017", diff --git a/R/read_sc_all_home_care.R b/R/read_sc_all_home_care.R index bfccf4428..cca2d0a9b 100644 --- a/R/read_sc_all_home_care.R +++ b/R/read_sc_all_home_care.R @@ -45,7 +45,16 @@ read_sc_all_home_care <- function(sc_dvprod_connection = phs_db_connection(dsn = )) %>% # drop rows start date after end date dplyr::collect() %>% - dplyr::distinct() %>% + dplyr::distinct() + + if (!fs::file_exists(get_sandpit_extract_path(type = "hc"))) { + home_care_data %>% + write_file(get_sandpit_extract_path(type = "hc")) + } else { + home_care_data <- home_care_data + } + + home_care_data <- home_care_data %>% dplyr::mutate(dplyr::across(c( "sending_location", "financial_year", diff --git a/R/read_sc_all_sds.R b/R/read_sc_all_sds.R index 18c5b52ec..d9d5b8b1d 100644 --- a/R/read_sc_all_sds.R +++ b/R/read_sc_all_sds.R @@ -22,19 +22,26 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR "sds_option_1", "sds_option_2", "sds_option_3", - "sds_start_date_after_end_date", - "sds_start_date_after_period_end_date", - "sds_end_date_not_within_period" + "sds_start_date_after_end_date", # get fixed + "sds_start_date_after_period_end_date" # get removed ) %>% dplyr::collect() %>% - dplyr::distinct() %>% + dplyr::distinct() + + if (!fs::file_exists(get_sandpit_extract_path(type = "sds"))) { + sds_full_data %>% + write_file(get_sandpit_extract_path(type = "sds")) + } else { + sds_full_data <- sds_full_data + } + + sds_full_data <- sds_full_data %>% dplyr::mutate(dplyr::across(c( "sending_location", "sds_option_1", "sds_option_2", "sds_option_3" - ), as.integer)) %>% - dplyr::filter(.data$sds_start_date_after_period_end_date != 1) + ), as.integer)) return(sds_full_data) } diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R index 73c1a3706..2c32bbb93 100644 --- a/R/replace_sc_id_with_latest.R +++ b/R/replace_sc_id_with_latest.R @@ -7,33 +7,23 @@ replace_sc_id_with_latest <- function(data) { # Check for required variables check_variables_exist( data, - c("sending_location", "social_care_id", "chi", "period") + c("sending_location", "social_care_id", "chi", "latest_flag") ) # select variables we need filter_data <- data %>% dplyr::select( - "sending_location", "social_care_id", "chi", "period" + "sending_location", "social_care_id", "chi", "latest_flag" ) %>% - dplyr::filter(!(is.na(.data$chi))) + dplyr::filter(!(is.na(.data$chi))) %>% + dplyr::distinct() change_sc_id <- filter_data %>% - # Sort (by sending_location, chi and period) for unique chi/sending location - dplyr::arrange( - .data$sending_location, - .data$chi, - dplyr::desc(.data$period) - ) %>% - # Find the latest sc_id for each chi/sending location by keeping latest period - dplyr::distinct( - .data$sending_location, - .data$chi, - .keep_all = TRUE - ) %>% + dplyr::filter(latest_flag == 1) %>% # Rename for latest sc id dplyr::rename(latest_sc_id = "social_care_id") %>% - # drop period for matching - dplyr::select(-"period") + # drop latest_flag for matching + dplyr::select(-"latest_flag") return_data <- change_sc_id %>% # Match back onto data @@ -41,6 +31,7 @@ replace_sc_id_with_latest <- function(data) { by = c("sending_location", "chi"), multiple = "all" ) %>% + dplyr::filter(!(is.na(period))) %>% # Overwrite sc id with the latest dplyr::mutate( social_care_id = dplyr::if_else( diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R index c6a962857..ffe86f48f 100644 --- a/R/write_tests_xlsx.R +++ b/R/write_tests_xlsx.R @@ -89,6 +89,9 @@ write_tests_xlsx <- function(comparison_data, # add a new sheet for tests date_today <- format(Sys.Date(), "%d_%b") + + date_today <- stringr::str_to_lower(date_today) + sheet_name_dated <- ifelse( is.null(year), stringr::str_glue("{sheet_name}_{date_today}"), diff --git a/Rmarkdown/costs_district_nursing.Rmd b/Rmarkdown/costs_district_nursing.Rmd index e3c9bba13..59b8353f8 100644 --- a/Rmarkdown/costs_district_nursing.Rmd +++ b/Rmarkdown/costs_district_nursing.Rmd @@ -75,7 +75,7 @@ dn_raw_costs_contacts <- left_join(dn_raw_contacts, # Of the two HSCPs, Argyll and Bute provides the # District Nursing data which is 27% of the population. -population_lookup <- read_file(get_datazone_pop_path("HSCP2019_pop_est_1981_2021.rds")) %>% +population_lookup <- read_file(get_pop_path(type = "hscp")) %>% # Select only the HSCPs for NHS Highland & years since 2015 filter( hscp2019 %in% c("S37000004", "S37000016"), diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R index 9be2eb9c6..ab75b94d7 100644 --- a/Run_SLF_Files_manually/run_episode_file_1718.R +++ b/Run_SLF_Files_manually/run_episode_file_1718.R @@ -4,7 +4,8 @@ library(createslf) year <- "1718" processed_data_list <- targets::tar_read("processed_data_list_1718", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R index 7dec9e5c1..cd5a7435f 100644 --- a/Run_SLF_Files_manually/run_episode_file_1819.R +++ b/Run_SLF_Files_manually/run_episode_file_1819.R @@ -4,7 +4,8 @@ library(createslf) year <- "1819" processed_data_list <- targets::tar_read("processed_data_list_1819", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R index 066bd27b7..a9dc591b1 100644 --- a/Run_SLF_Files_manually/run_episode_file_1920.R +++ b/Run_SLF_Files_manually/run_episode_file_1920.R @@ -4,7 +4,8 @@ library(createslf) year <- "1920" processed_data_list <- targets::tar_read("processed_data_list_1920", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R index 8354f49ae..37708ee8b 100644 --- a/Run_SLF_Files_manually/run_episode_file_2021.R +++ b/Run_SLF_Files_manually/run_episode_file_2021.R @@ -4,7 +4,8 @@ library(createslf) year <- "2021" processed_data_list <- targets::tar_read("processed_data_list_2021", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R index 4057770d1..47400e2d1 100644 --- a/Run_SLF_Files_manually/run_episode_file_2122.R +++ b/Run_SLF_Files_manually/run_episode_file_2122.R @@ -4,7 +4,8 @@ library(createslf) year <- "2122" processed_data_list <- targets::tar_read("processed_data_list_2122", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R index 5df7b5db6..e64a57f32 100644 --- a/Run_SLF_Files_manually/run_episode_file_2223.R +++ b/Run_SLF_Files_manually/run_episode_file_2223.R @@ -4,7 +4,8 @@ library(createslf) year <- "2223" processed_data_list <- targets::tar_read("processed_data_list_2223", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R index af9a3efe5..4a7f0ad29 100644 --- a/Run_SLF_Files_manually/run_episode_file_2324.R +++ b/Run_SLF_Files_manually/run_episode_file_2324.R @@ -4,7 +4,8 @@ library(createslf) year <- "2324" processed_data_list <- targets::tar_read("processed_data_list_2324", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_targets/run_targets_1718.R b/Run_SLF_Files_targets/run_targets_1718.R index ebc58895f..ac03edd3f 100644 --- a/Run_SLF_Files_targets/run_targets_1718.R +++ b/Run_SLF_Files_targets/run_targets_1718.R @@ -1,4 +1,18 @@ library(targets) + +year <- "1718" + +# use targets for the process until testing episode files tar_make_future( + # it does not recognise `contains(year)` names = (targets::contains("1718")) ) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_1819.R b/Run_SLF_Files_targets/run_targets_1819.R index 83bbcedef..b60728359 100644 --- a/Run_SLF_Files_targets/run_targets_1819.R +++ b/Run_SLF_Files_targets/run_targets_1819.R @@ -1,4 +1,18 @@ library(targets) + +year <- "1819" + +# use targets for the process until testing episode files tar_make_future( + # it does not recognise `contains(year)` names = (targets::contains("1819")) ) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_1920.R b/Run_SLF_Files_targets/run_targets_1920.R index 1640d1900..897ee0b7a 100644 --- a/Run_SLF_Files_targets/run_targets_1920.R +++ b/Run_SLF_Files_targets/run_targets_1920.R @@ -1,4 +1,18 @@ library(targets) + +year <- "1920" + +# use targets for the process until testing episode files tar_make_future( + # it does not recognise `contains(year)` names = (targets::contains("1920")) ) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2021.R b/Run_SLF_Files_targets/run_targets_2021.R index 80749e81a..53333c014 100644 --- a/Run_SLF_Files_targets/run_targets_2021.R +++ b/Run_SLF_Files_targets/run_targets_2021.R @@ -1,4 +1,18 @@ library(targets) + +year <- "2021" + +# use targets for the process until testing episode files tar_make_future( + # it does not recognise `contains(year)` names = (targets::contains("2021")) ) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2122.R b/Run_SLF_Files_targets/run_targets_2122.R index aa95d7b24..457fe33e7 100644 --- a/Run_SLF_Files_targets/run_targets_2122.R +++ b/Run_SLF_Files_targets/run_targets_2122.R @@ -1,4 +1,18 @@ library(targets) + +year <- "2122" + +# use targets for the process until testing episode files tar_make_future( + # it does not recognise `contains(year)` names = (targets::contains("2122")) ) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2223.R b/Run_SLF_Files_targets/run_targets_2223.R index 2ded7d5fd..fc851f3f7 100644 --- a/Run_SLF_Files_targets/run_targets_2223.R +++ b/Run_SLF_Files_targets/run_targets_2223.R @@ -1,4 +1,18 @@ library(targets) + +year <- "2223" + +# use targets for the process until testing episode files tar_make_future( + # it does not recognise `contains(year)` names = (targets::contains("2223")) ) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2324.R b/Run_SLF_Files_targets/run_targets_2324.R index b875984f4..3b4c9b240 100644 --- a/Run_SLF_Files_targets/run_targets_2324.R +++ b/Run_SLF_Files_targets/run_targets_2324.R @@ -1,4 +1,18 @@ library(targets) + +year <- "2324" + +# use targets for the process until testing episode files tar_make_future( + # it does not recognise `contains(year)` names = (targets::contains("2324")) ) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/_targets.R b/_targets.R index 81adbf7c2..15d2584bb 100644 --- a/_targets.R +++ b/_targets.R @@ -591,24 +591,24 @@ list( data = episode_file, year = year ) - ), - tar_target( - individual_file, - create_individual_file( - episode_file = episode_file, - year = year, - homelessness_lookup = homelessness_lookup, - write_to_disk = write_to_disk - ) - ), - tar_target( - individual_file_tests, - process_tests_individual_file( - data = individual_file, - year = year - ) ) # , # tar_target( + # individual_file, + # create_individual_file( + # episode_file = episode_file, + # year = year, + # homelessness_lookup = homelessness_lookup, + # write_to_disk = write_to_disk + # ) + # ), + # tar_target( + # individual_file_tests, + # process_tests_individual_file( + # data = individual_file, + # year = year + # ) + # ) # , + # tar_target( # episode_file_dataset, # arrow::write_dataset( # dataset = episode_file, diff --git a/man/add_all_columns.Rd b/man/add_all_columns.Rd index 345a59e01..deb1594b3 100644 --- a/man/add_all_columns.Rd +++ b/man/add_all_columns.Rd @@ -4,10 +4,12 @@ \alias{add_all_columns} \title{Add all columns} \usage{ -add_all_columns(episode_file) +add_all_columns(episode_file, year) } \arguments{ \item{episode_file}{Tibble containing episodic data.} + +\item{year}{The year to process, in FY format.} } \description{ Add new columns based on SMRType and recid which follow a pattern diff --git a/man/aggregate_by_chi.Rd b/man/aggregate_by_chi.Rd index 84c9c0ad3..16bf7d792 100644 --- a/man/aggregate_by_chi.Rd +++ b/man/aggregate_by_chi.Rd @@ -4,10 +4,12 @@ \alias{aggregate_by_chi} \title{Aggregate by CHI} \usage{ -aggregate_by_chi(episode_file, exclude_sc_var = FALSE) +aggregate_by_chi(episode_file, year, exclude_sc_var = FALSE) } \arguments{ \item{episode_file}{Tibble containing episodic data.} + +\item{year}{The year to process, in FY format.} } \description{ Aggregate episode file by CHI to convert into diff --git a/man/calculate_stay.Rd b/man/calculate_stay.Rd index 43b7bd166..5e9266b10 100644 --- a/man/calculate_stay.Rd +++ b/man/calculate_stay.Rd @@ -34,16 +34,16 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd index 142fa4aab..5a50370e0 100644 --- a/man/compute_mid_year_age.Rd +++ b/man/compute_mid_year_age.Rd @@ -31,16 +31,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/convert_date_to_numeric.Rd b/man/convert_date_to_numeric.Rd index 5511fec84..b67eaa778 100644 --- a/man/convert_date_to_numeric.Rd +++ b/man/convert_date_to_numeric.Rd @@ -24,16 +24,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{compute_mid_year_age}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/convert_numeric_to_date.Rd b/man/convert_numeric_to_date.Rd index f786e0319..a09b7b9b9 100644 --- a/man/convert_numeric_to_date.Rd +++ b/man/convert_numeric_to_date.Rd @@ -24,16 +24,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/create_demog_test_flags.Rd b/man/create_demog_test_flags.Rd index 589877738..fbc0fadcc 100644 --- a/man/create_demog_test_flags.Rd +++ b/man/create_demog_test_flags.Rd @@ -4,10 +4,12 @@ \alias{create_demog_test_flags} \title{Create demographic test flags} \usage{ -create_demog_test_flags(data) +create_demog_test_flags(data, chi = c(chi, anon_chi)) } \arguments{ \item{data}{a dataframe containing demographic variables e.g. chi} + +\item{chi}{Specify chi or anon_chi.} } \value{ a dataframe with flag (1 or 0) for each demographic variable. diff --git a/man/end_fy.Rd b/man/end_fy.Rd index 2925ffe60..6220f5f32 100644 --- a/man/end_fy.Rd +++ b/man/end_fy.Rd @@ -34,8 +34,8 @@ Other date functions: \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_fy_quarter.Rd b/man/end_fy_quarter.Rd index 0efe9624a..26c439a04 100644 --- a/man/end_fy_quarter.Rd +++ b/man/end_fy_quarter.Rd @@ -33,8 +33,8 @@ Other date functions: \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_next_fy_quarter.Rd b/man/end_next_fy_quarter.Rd index f9cc1720a..702446e82 100644 --- a/man/end_next_fy_quarter.Rd +++ b/man/end_next_fy_quarter.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/fix_sc_end_dates.Rd b/man/fix_sc_end_dates.Rd index 1bf808bea..041751319 100644 --- a/man/fix_sc_end_dates.Rd +++ b/man/fix_sc_end_dates.Rd @@ -4,7 +4,7 @@ \alias{fix_sc_end_dates} \title{Fix sc end dates} \usage{ -fix_sc_end_dates(start_date, end_date, period) +fix_sc_end_dates(start_date, end_date, period_end_date) } \arguments{ \item{start_date}{A vector containing dates.} diff --git a/man/fy_interval.Rd b/man/fy_interval.Rd index 12d1d36bb..00b9ea52c 100644 --- a/man/fy_interval.Rd +++ b/man/fy_interval.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/get_sandpit_extract_path.Rd b/man/get_sandpit_extract_path.Rd new file mode 100644 index 000000000..c938b45ea --- /dev/null +++ b/man/get_sandpit_extract_path.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_sandpit_extract_path.R +\name{get_sandpit_extract_path} +\alias{get_sandpit_extract_path} +\title{Sandpit Extract File Path} +\usage{ +get_sandpit_extract_path( + type = c("at", "ch", "hc", "sds", "client", "demographics"), + year = NULL, + update = latest_update(), + ... +) +} +\arguments{ +\item{update}{The update month to use, +defaults to \code{\link[=latest_update]{latest_update()}}} + +\item{...}{additional arguments passed to \code{\link[=get_file_path]{get_file_path()}}} +} +\value{ +The path to the sandpit extracts as an \code{\link[fs:path]{fs::path()}} +} +\description{ +Get the file path for sandpit extracts +} +\seealso{ +\code{\link[=get_file_path]{get_file_path()}} for the generic function. +} +\concept{social care sandpit extract paths} diff --git a/man/is_date_in_fyyear.Rd b/man/is_date_in_fyyear.Rd index 97a0f3639..e74bd5734 100644 --- a/man/is_date_in_fyyear.Rd +++ b/man/is_date_in_fyyear.Rd @@ -41,15 +41,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd index f52305356..3d3b9544e 100644 --- a/man/last_date_month.Rd +++ b/man/last_date_month.Rd @@ -25,15 +25,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/midpoint_fy.Rd b/man/midpoint_fy.Rd index 7bac9b6b3..2363df773 100644 --- a/man/midpoint_fy.Rd +++ b/man/midpoint_fy.Rd @@ -27,15 +27,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/next_fy.Rd b/man/next_fy.Rd index 19e1193f4..7524c5f11 100644 --- a/man/next_fy.Rd +++ b/man/next_fy.Rd @@ -27,15 +27,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/process_tests_acute.Rd b/man/process_tests_acute.Rd index ba6e28c37..ebf44ef2e 100644 --- a/man/process_tests_acute.Rd +++ b/man/process_tests_acute.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ Takes the processed Acute extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_ae.Rd b/man/process_tests_ae.Rd index 53d3cf83a..eb16ad7ea 100644 --- a/man/process_tests_ae.Rd +++ b/man/process_tests_ae.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed A&E extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_care_home.Rd b/man/process_tests_care_home.Rd index d6f4a04d6..323552062 100644 --- a/man/process_tests_care_home.Rd +++ b/man/process_tests_care_home.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed Care home extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_cmh.Rd b/man/process_tests_cmh.Rd index f9bbd1e9f..2dbb5bd1c 100644 --- a/man/process_tests_cmh.Rd +++ b/man/process_tests_cmh.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed CMH extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_delayed_discharges.Rd b/man/process_tests_delayed_discharges.Rd index 68e1b8f17..f900cdfe7 100644 --- a/man/process_tests_delayed_discharges.Rd +++ b/man/process_tests_delayed_discharges.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ Takes the processed Delayed Discharges extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_episode_file.Rd b/man/process_tests_episode_file.Rd index 6dbb881b7..2458db96f 100644 --- a/man/process_tests_episode_file.Rd +++ b/man/process_tests_episode_file.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ Takes the processed episode file and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_gp_ooh.Rd b/man/process_tests_gp_ooh.Rd index 48b05813e..f7543ef8f 100644 --- a/man/process_tests_gp_ooh.Rd +++ b/man/process_tests_gp_ooh.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed GP OOH extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_home_care.Rd b/man/process_tests_home_care.Rd index 1d6ee69bb..d922c27b2 100644 --- a/man/process_tests_home_care.Rd +++ b/man/process_tests_home_care.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed Home Care extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_individual_file.Rd b/man/process_tests_individual_file.Rd index 02b06f48d..8230bad1d 100644 --- a/man/process_tests_individual_file.Rd +++ b/man/process_tests_individual_file.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ Takes the processed individual file and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_it_chi_deaths.Rd b/man/process_tests_it_chi_deaths.Rd index 7094b1253..bffc4afc9 100644 --- a/man/process_tests_it_chi_deaths.Rd +++ b/man/process_tests_it_chi_deaths.Rd @@ -17,5 +17,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed acute extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_lookup_gpprac.Rd b/man/process_tests_lookup_gpprac.Rd index 114092d2e..d6863e7ec 100644 --- a/man/process_tests_lookup_gpprac.Rd +++ b/man/process_tests_lookup_gpprac.Rd @@ -17,5 +17,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed gpprac lookup and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_lookup_pc.Rd b/man/process_tests_lookup_pc.Rd index 6369dab1d..659b25c99 100644 --- a/man/process_tests_lookup_pc.Rd +++ b/man/process_tests_lookup_pc.Rd @@ -17,5 +17,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed acute extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_ltcs.Rd b/man/process_tests_ltcs.Rd index 85da43588..80ad28e82 100644 --- a/man/process_tests_ltcs.Rd +++ b/man/process_tests_ltcs.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed LTCs extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_maternity.Rd b/man/process_tests_maternity.Rd index 2c1658108..aa74766ba 100644 --- a/man/process_tests_maternity.Rd +++ b/man/process_tests_maternity.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed homelessness extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_mental_health.Rd b/man/process_tests_mental_health.Rd index e2038d356..a488d065a 100644 --- a/man/process_tests_mental_health.Rd +++ b/man/process_tests_mental_health.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed homelessness extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_nrs_deaths.Rd b/man/process_tests_nrs_deaths.Rd index 79e2f3e32..d47aa72b9 100644 --- a/man/process_tests_nrs_deaths.Rd +++ b/man/process_tests_nrs_deaths.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed NRS deaths extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_outpatients.Rd b/man/process_tests_outpatients.Rd index 288b89512..8c583e62b 100644 --- a/man/process_tests_outpatients.Rd +++ b/man/process_tests_outpatients.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed outpatients extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_prescribing.Rd b/man/process_tests_prescribing.Rd index 4ef73bcc8..a5c10a67b 100644 --- a/man/process_tests_prescribing.Rd +++ b/man/process_tests_prescribing.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed prescribing extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/process_tests_sds.Rd b/man/process_tests_sds.Rd index 2f6b7b0b7..e4bdeabf3 100644 --- a/man/process_tests_sds.Rd +++ b/man/process_tests_sds.Rd @@ -16,5 +16,5 @@ a \link[tibble:tibble-package]{tibble} containing a test comparison. } \description{ This script takes the processed SDS extract and produces -a test comparison with the previous data. This is written to disk as a CSV. +a test comparison with the previous data. This is written to disk as an xlsx. } diff --git a/man/start_fy.Rd b/man/start_fy.Rd index 4996bfb72..9951af2ec 100644 --- a/man/start_fy.Rd +++ b/man/start_fy.Rd @@ -27,8 +27,8 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, diff --git a/man/start_fy_quarter.Rd b/man/start_fy_quarter.Rd index f5729dcb0..9936736a8 100644 --- a/man/start_fy_quarter.Rd +++ b/man/start_fy_quarter.Rd @@ -26,8 +26,8 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, diff --git a/man/start_next_fy_quarter.Rd b/man/start_next_fy_quarter.Rd index 098f0bf73..fdac297a7 100644 --- a/man/start_next_fy_quarter.Rd +++ b/man/start_next_fy_quarter.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, -\code{\link{start_fy}()} +\code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()} } \concept{date functions} diff --git a/tests/testthat/test-get_lookup_paths.R b/tests/testthat/test-get_lookup_paths.R index c56752b03..29d538cc1 100644 --- a/tests/testthat/test-get_lookup_paths.R +++ b/tests/testthat/test-get_lookup_paths.R @@ -48,13 +48,11 @@ test_that("SIMD file path returns as expected", { test_that("population estimates file path returns as expected", { suppressMessages({ - expect_s3_class(get_datazone_pop_path(), "fs_path") + expect_s3_class(get_pop_path(type = "datazone"), "fs_path") - expect_equal(fs::path_ext(get_datazone_pop_path()), "rds") + expect_equal(fs::path_ext(get_pop_path(type = "datazone")), "rds") - expect_match(get_datazone_pop_path(), "DataZone2011_pop_est_2001_\\d+?") - - expect_true(fs::file_exists(get_datazone_pop_path())) + expect_true(fs::file_exists(get_pop_path(type = "datazone"))) }) })