Skip to content

Commit

Permalink
cqc funs
Browse files Browse the repository at this point in the history
  • Loading branch information
AdnanShroufi committed Jun 20, 2024
1 parent d17ee8d commit 8ada08e
Show file tree
Hide file tree
Showing 2 changed files with 244 additions and 10 deletions.
207 changes: 197 additions & 10 deletions data-raw/workflow/01_upload_cqc_data_from_api.R
Original file line number Diff line number Diff line change
@@ -1,23 +1,210 @@
# Function to get api content from url
get_api_content <- function(url) {

# Get api data
data = httr::GET(url)

# Get cqc primary key from environ file
key = Sys.getenv("CQC_PRIMARY_KEY")

# Function to get api content from url
get_api_content <- function(url){

# Get api data
data = httr::GET(url, httr::add_headers(`Ocp-Apim-Subscription-Key` = key))

# Convert binary to character
content = jsonlite::fromJSON(rawToChar(data$content))

# Return content
return(content)
}

# Get number of cqc pages for main api query
api_content <- get_api_content(
"https://api.cqc.org.uk/public/v1/locations?careHome=Y&page=1&perPage=1"
# Get number of pages
get_number_of_pages = function(){

# Define url
url = "https://api.service.cqc.org.uk/public/v1/locations"

# Get locations overview
api_content = get_api_content(url)

# Get number of pages
total_pages = api_content$totalPages

# Return
return(total_pages)
}

# Get all locations per page
get_location_ids_per_page = function(page_num){

# Define url
url = paste0(
"https://api.service.cqc.org.uk/public/v1/locations?page=",
page_num,
"&perPage=1000"
)

# Get locations overview
api_content = get_api_content(url)

# Get locations ids
location_vec = api_content$locations$locationId

# Return
return(location_vec)
}

# Get all locations by location_vec index
get_location_info_by_id <- function(loc_num) {

# Paste location url with location_id
url = paste0(
"https://api.service.cqc.org.uk/public/v1/locations",
location_vec[loc_num]
)

# Get data
data = get_api_content(url) %>%
unlist() %>%
bind_rows()

# Sleep if less than 2 rows
while (ncol(data) <= 2) {
Sys.sleep(0.05)

data = get_api_content(url) %>%
unlist() %>%
bind_rows()
}

# Return data
return(data)
}

# Get total pages
total_pages = get_number_of_pages()

# Get all location ids
location_vec = lapply(1:total_pages, get_location_ids_per_page)

# Unlist into a single vector
location_vec = unlist(all_locations)


# Get columns names from a location id
get_col_names = function(index){

# Create url
url = paste0(
"https://api.service.cqc.org.uk/public/v1/locations/",
location_vec[index]
)

# Get data
data = get_api_content(url)

# Column names
cols = names(data)

# Return
return(cols)
}

cols = lapply(1:length(location_vec), get_col_names)


data$assessment

unlist() %>%
bind_rows()


data$uprn

a = get_location_info_by_id(48)

cbind(
data["name"],
data$specialisms,
data$regulatedActivities,
data["locationId"]
)

# Get number of 10k blocks required
no_of_pages = ceiling(api_content$total / 10000)

names(data)






data$assessment
data$assessmentServiceGroup
data$numberOfBeds

data$type
data$locationTypes

cqc_cols = c(
'name',
'postalCode',
'uprn',
'locationId',
'providerId',
'organisationType',
'type',
'lastInspection',
'deregistrationDate',
'registrationStatus',
'registrationDate',
'postalAddressLine1',
'postalAddressLine2',
'postalAddressTownCity',
'postalAddressCounty',
'numberOfBeds',
'gacServicesTypes',
'gacServicesTypesNames',
'regulatedActivities',
'specialisms',

)


uprn = as.numeric(uprn),
location_id,
provider_id,
last_inspection_date,
registration_date,
deregistration_date,
single_line_address,
postcode = toupper(gsub("[^[:alnum:]]", "", postal_code)),
nursing_home_flag = as.integer(grepl(
"Nursing home", gac_service_types_names
)),
residential_home_flag = as.integer(grepl(
"Residential home", gac_service_types_names
)),
# type,
number_of_beds = as.integer(number_of_beds),
current_rating = current_ratings_overall_rating,
key_question_names = current_ratings_overall_key_question_ratings_names,
key_question_ratings = current_ratings_overall_key_question_ratings_ratings,
cqc_date = download_date,
ods_code,
specialisms,
regulated_activities_names,
gac_service_types = gac_service_types_names,


which

c = data[names(data) %in% cqc_cols] %>%
unlist() %>%
bind_rows()
c


data$uprn



get_cqc_locations_details <- function(page_num) {

Expand Down
47 changes: 47 additions & 0 deletions data-raw/workflow/workflow_run_23_24.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Load/install all required packages and functions
source("data-raw/workflow/workflow_packages.R")
source("data-raw/workflow/workflow_helpers.R")
source("data-raw/workflow/workflow_production.R")

# Specify variables to retain at end of each script
keep_vars = c(ls(), 'keep_vars')

# FY 22/23 ---------------------------------------------------------------------

# 1. Get latest cqc data: 0.5hr - Run once in first epoch script
get_latest_cqc_data()

# 2. Get latest ab plus epoch: ~2hr
get_abp_from_api(
end_date = "2024-03-31"
)

# 3. Merge and process cqc and ab plus: ~3 mins
create_ab_plus_cqc_data(
ab_plus_data = "INT646_ABP_20230331",
cqc_data = "INT646_CQC_20230602",
start_date = "2022-04-01",
end_date = "2023-03-31"
)

# 4. Create form level fact for records with a ch-postcode: ~11-14hr
create_form_level_patient_addresses(
address_data = "INT646_ABP_CQC_20220401_20230331"
)

# 5. Match patient details against ch-postcode uprn and process: ~30-40 mins
create_care_home_address_match(
patient_address_data = "INT646_FORMS_20220401_20230331",
lookup_address_data = "INT646_ABP_CQC_20220401_20230331",
parent_uprn_data = "INT646_ABP_20230331"
)

# 6. Create postcode lookup table (latest available mappings) for joining in the next step: ~5 min
# create_postcode_lookup() # Run once in first epoch script


# 7. Join to fact table and get non ch-postcode records within time frame: ~9 hrs
create_matched_prescription_base_table(
match_data = "INT646_MATCH_20220401_20230331",
form_data = "INT646_FORMS_20220401_20230331"
)

0 comments on commit 8ada08e

Please sign in to comment.