diff --git a/data-raw/workflow/01_upload_cqc_data_from_api.R b/data-raw/workflow/01_upload_cqc_data_from_api.R index 2834b50e..d55063ce 100644 --- a/data-raw/workflow/01_upload_cqc_data_from_api.R +++ b/data-raw/workflow/01_upload_cqc_data_from_api.R @@ -1,23 +1,210 @@ -# Function to get api content from url -get_api_content <- function(url) { - # Get api data - data = httr::GET(url) +# Get cqc primary key from environ file +key = Sys.getenv("CQC_PRIMARY_KEY") + +# Function to get api content from url +get_api_content <- function(url){ + + # Get api data + data = httr::GET(url, httr::add_headers(`Ocp-Apim-Subscription-Key` = key)) + # Convert binary to character content = jsonlite::fromJSON(rawToChar(data$content)) - + # Return content return(content) } -# Get number of cqc pages for main api query -api_content <- get_api_content( - "https://api.cqc.org.uk/public/v1/locations?careHome=Y&page=1&perPage=1" +# Get number of pages +get_number_of_pages = function(){ + + # Define url + url = "https://api.service.cqc.org.uk/public/v1/locations" + + # Get locations overview + api_content = get_api_content(url) + + # Get number of pages + total_pages = api_content$totalPages + + # Return + return(total_pages) +} + +# Get all locations per page +get_location_ids_per_page = function(page_num){ + + # Define url + url = paste0( + "https://api.service.cqc.org.uk/public/v1/locations?page=", + page_num, + "&perPage=1000" + ) + + # Get locations overview + api_content = get_api_content(url) + + # Get locations ids + location_vec = api_content$locations$locationId + + # Return + return(location_vec) +} + +# Get all locations by location_vec index +get_location_info_by_id <- function(loc_num) { + + # Paste location url with location_id + url = paste0( + "https://api.service.cqc.org.uk/public/v1/locations", + location_vec[loc_num] + ) + + # Get data + data = get_api_content(url) %>% + unlist() %>% + bind_rows() + + # Sleep if less than 2 rows + while (ncol(data) <= 2) { + Sys.sleep(0.05) + + data = get_api_content(url) %>% + unlist() %>% + bind_rows() + } + + # Return data + return(data) +} + +# Get total pages +total_pages = get_number_of_pages() + +# Get all location ids +location_vec = lapply(1:total_pages, get_location_ids_per_page) + +# Unlist into a single vector +location_vec = unlist(all_locations) + + +# Get columns names from a location id +get_col_names = function(index){ + + # Create url + url = paste0( + "https://api.service.cqc.org.uk/public/v1/locations/", + location_vec[index] + ) + + # Get data + data = get_api_content(url) + + # Column names + cols = names(data) + + # Return + return(cols) +} + +cols = lapply(1:length(location_vec), get_col_names) + + +data$assessment + + unlist() %>% + bind_rows() + + +data$uprn + +a = get_location_info_by_id(48) + +cbind( + data["name"], + data$specialisms, + data$regulatedActivities, + data["locationId"] ) -# Get number of 10k blocks required -no_of_pages = ceiling(api_content$total / 10000) + +names(data) + + + + + + +data$assessment +data$assessmentServiceGroup +data$numberOfBeds + +data$type +data$locationTypes + +cqc_cols = c( + 'name', + 'postalCode', + 'uprn', + 'locationId', + 'providerId', + 'organisationType', + 'type', + 'lastInspection', + 'deregistrationDate', + 'registrationStatus', + 'registrationDate', + 'postalAddressLine1', + 'postalAddressLine2', + 'postalAddressTownCity', + 'postalAddressCounty', + 'numberOfBeds', + 'gacServicesTypes', + 'gacServicesTypesNames', + 'regulatedActivities', + 'specialisms', + +) + + +uprn = as.numeric(uprn), +location_id, +provider_id, +last_inspection_date, +registration_date, +deregistration_date, +single_line_address, +postcode = toupper(gsub("[^[:alnum:]]", "", postal_code)), +nursing_home_flag = as.integer(grepl( + "Nursing home", gac_service_types_names +)), +residential_home_flag = as.integer(grepl( + "Residential home", gac_service_types_names +)), +# type, +number_of_beds = as.integer(number_of_beds), +current_rating = current_ratings_overall_rating, +key_question_names = current_ratings_overall_key_question_ratings_names, +key_question_ratings = current_ratings_overall_key_question_ratings_ratings, +cqc_date = download_date, +ods_code, +specialisms, +regulated_activities_names, +gac_service_types = gac_service_types_names, + + +which + +c = data[names(data) %in% cqc_cols] %>% + unlist() %>% + bind_rows() +c + + +data$uprn + + get_cqc_locations_details <- function(page_num) { diff --git a/data-raw/workflow/workflow_run_23_24.R b/data-raw/workflow/workflow_run_23_24.R new file mode 100644 index 00000000..66cf611f --- /dev/null +++ b/data-raw/workflow/workflow_run_23_24.R @@ -0,0 +1,47 @@ +# Load/install all required packages and functions +source("data-raw/workflow/workflow_packages.R") +source("data-raw/workflow/workflow_helpers.R") +source("data-raw/workflow/workflow_production.R") + +# Specify variables to retain at end of each script +keep_vars = c(ls(), 'keep_vars') + +# FY 22/23 --------------------------------------------------------------------- + +# 1. Get latest cqc data: 0.5hr - Run once in first epoch script +get_latest_cqc_data() + +# 2. Get latest ab plus epoch: ~2hr +get_abp_from_api( + end_date = "2024-03-31" +) + +# 3. Merge and process cqc and ab plus: ~3 mins +create_ab_plus_cqc_data( + ab_plus_data = "INT646_ABP_20230331", + cqc_data = "INT646_CQC_20230602", + start_date = "2022-04-01", + end_date = "2023-03-31" +) + +# 4. Create form level fact for records with a ch-postcode: ~11-14hr +create_form_level_patient_addresses( + address_data = "INT646_ABP_CQC_20220401_20230331" +) + +# 5. Match patient details against ch-postcode uprn and process: ~30-40 mins +create_care_home_address_match( + patient_address_data = "INT646_FORMS_20220401_20230331", + lookup_address_data = "INT646_ABP_CQC_20220401_20230331", + parent_uprn_data = "INT646_ABP_20230331" +) + +# 6. Create postcode lookup table (latest available mappings) for joining in the next step: ~5 min +# create_postcode_lookup() # Run once in first epoch script + + +# 7. Join to fact table and get non ch-postcode records within time frame: ~9 hrs +create_matched_prescription_base_table( + match_data = "INT646_MATCH_20220401_20230331", + form_data = "INT646_FORMS_20220401_20230331" +)