Skip to content

Commit

Permalink
2019 ipd release
Browse files Browse the repository at this point in the history
- Tract 42091206702 returns NoData for the IPD classification it was corrected by hard coding "Well Below Average"
- Adds new dependencies
  • Loading branch information
DVRPCfreight committed Sep 5, 2023
1 parent dfe6c78 commit 71a69c0
Showing 1 changed file with 32 additions and 24 deletions.
56 changes: 32 additions & 24 deletions ipd.r
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
## SETUP
# Dependencies
library(plyr); library(here); library(sf); library(summarytools);
library(tidycensus); library(tidyverse); library(tigris)
library(tidycensus); library(tidyverse); library(tigris); library(dplyr); library(descr)

# Census API Key
census_api_key("Insert API Key Here", overwrite = TRUE)
census_api_key("API Key Goes Here", overwrite = TRUE)

# Fields
# See https://www.census.gov/data/developers/data-sets/acs-5year.html
Expand Down Expand Up @@ -37,11 +37,12 @@ youth_universe <- "B03002_001"
youth_count <- "B09001_001"
youth_percent <- NA

ipd_year <- 2018
ipd_year <- 2019
ipd_states <- c("NJ", "PA")
ipd_counties <- c("34005", "34007", "34015", "34021", "42017", "42029", "42045", "42091", "42101")

# Functions

min <- function(i, ..., na.rm = TRUE) {
base::min(i, ..., na.rm = na.rm)
}
Expand Down Expand Up @@ -81,8 +82,8 @@ move_last <- function(df, last_col) {
}

description <- function(i) {
des <- as.numeric(descr(i, na.rm = TRUE,
stats = c("min", "med", "mean", "sd", "max")))
des <- as.numeric(summarytools::descr(i, na.rm = TRUE,
stats = c("min", "med", "mean", "sd", "max")))
des <- c(des[1:4], des[4] / 2, des[5])
return(des)
}
Expand All @@ -104,12 +105,12 @@ for (i in 1:length(ipd_states)){
ipd_year,
"/data/5-year/140/B02001_",
ipd_states_numeric[i],
".csv.gz")
".csv.zip")
temp <- tempfile()
download.file(url, temp)
var_rep_i <- read_csv(gzfile(temp))
var_rep <- rbind(var_rep, var_rep_i)
}
var_rep_i <- read_csv(unzip(temp))
var_rep <- dplyr::bind_rows(var_rep, var_rep_i)
}

var_rep <- var_rep %>%
mutate_at(vars(GEOID), ~(str_sub(., 8, 18))) %>%
Expand Down Expand Up @@ -353,10 +354,10 @@ dl_percs <- dl_percs %>% filter(!(GEOID10 %in% slicer))
# Sort column names for consistency
# `comp` = "component parts"
comp <- list()
comp$uni_est <- dl_counts %>% select(ends_with("UE")) %>% select(sort(current_vars()))
comp$uni_moe <- dl_counts %>% select(ends_with("UM")) %>% select(sort(current_vars()))
comp$count_est <- dl_counts %>% select(ends_with("CE")) %>% select(sort(current_vars()))
comp$count_moe <- dl_counts %>% select(ends_with("CM")) %>% select(sort(current_vars()))
comp$uni_est <- dl_counts %>% select(ends_with("UE")) %>% select(sort(tidyselect::peek_vars()))
comp$uni_moe <- dl_counts %>% select(ends_with("UM")) %>% select(sort(tidyselect::peek_vars()))
comp$count_est <- dl_counts %>% select(ends_with("CE")) %>% select(sort(tidyselect::peek_vars()))
comp$count_moe <- dl_counts %>% select(ends_with("CM")) %>% select(sort(tidyselect::peek_vars()))

# Compute percentages and associated MOEs
pct_matrix <- NULL
Expand Down Expand Up @@ -400,7 +401,7 @@ pct_moe <- pct_moe %>% mutate(D_PctMOE = dl_percs$D_PM,
# Compute percentile

# Add percentages to `comp`; sort column names for consistency
comp$pct_est <- pct %>% select(sort(current_vars()))
comp$pct_est <- pct %>% select(sort(tidyselect::peek_vars()))
percentile_matrix <- NULL

for (c in 1:length(comp$uni_est)){
Expand Down Expand Up @@ -470,24 +471,25 @@ ipd <- ipd %>% mutate(STATEFP10 = str_sub(GEOID10, 1, 2),
select(-ends_with("UE"), -ends_with("UM"))

# Reorder columns
ipd <- ipd %>% select(GEOID10, STATEFP10, COUNTYFP10, NAME10, sort(current_vars())) %>%
ipd <- ipd %>% select(GEOID10, STATEFP10, COUNTYFP10, NAME10, sort(tidyselect::peek_vars())) %>%
select(move_last(., c("IPD_Score", "U_TPopEst", "U_TPopMOE",
"U_Pop6Est", "U_Pop6MOE", "U_PPovEst",
"U_PPovMOE", "U_PNICEst", "U_PNICMOE")))

# Replace NA with NoData if character and 0 if numeric
ipd <- ipd %>% mutate_if(is.character, ~(ifelse(is.na(.), "NoData", .))) %>%
mutate_if(is.numeric, ~(ifelse(is.na(.), 0, .)))

# Append low-population tracts back onto dataset
slicer <- enframe(slicer, name = NULL, value = "GEOID10")
ipd <- plyr::rbind.fill(ipd, slicer)

# Replace NA with NoData if character and -99999 if numeric
ipd <- ipd %>% mutate_if(is.character, ~(ifelse(is.na(.), "NoData", .))) %>%
mutate_if(is.numeric, ~(ifelse(is.na(.), -99999, .)))

## SUMMARY TABLES

# Replace -99999 with NA for our purposes
# Replace 0 with NA for our purposes
ipd_summary <- ipd
ipd_summary[ipd_summary == -99999] <- NA
ipd_summary[ipd_summary == 0]

# Count of tracts that fall in each bin
counts <- ipd_summary %>% select(ends_with("Class"))
Expand Down Expand Up @@ -523,19 +525,20 @@ breaks <- ipd_summary %>% select(ends_with("PctEst"))
export_breaks <- round(mapply(st_dev_breaks, x = breaks, i = 5, na.rm = TRUE), digits = 3)
export_breaks <- as_tibble(export_breaks) %>%
mutate(Class = c("Min", "1", "2", "3", "4", "Max")) %>%
select(Class, current_vars())
select(Class, tidyselect::peek_vars())

# Minimum, median, mean, standard deviation, maximum
pcts <- ipd_summary %>% select(ends_with("PctEst"))
summary_data <- apply(pcts, 2, description)

summary_data <- apply(pcts, MARGIN=2, description)
export_summary <- as_tibble(summary_data) %>%
mutate_all(round_2) %>%
mutate(Statistic = c("Minimum", "Median", "Mean", "SD", "Half-SD", "Maximum")) %>%
select(Statistic, current_vars())
select(Statistic, tidyselect::peek_vars())

# Population-weighted county means for each indicator
export_means <- dl_counts %>% select(GEOID10, ends_with("UE"), ends_with("CE")) %>%
select(GEOID10, sort(current_vars())) %>%
select(GEOID10, sort(tidyselect::peek_vars())) %>%
mutate(County = str_sub(GEOID10, 1, 5)) %>%
select(-GEOID10) %>%
group_by(County) %>%
Expand All @@ -551,6 +554,11 @@ export_means <- dl_counts %>% select(GEOID10, ends_with("UE"), ends_with("CE"))
mutate_if(is.numeric, ~ . * 100) %>%
mutate_if(is.numeric, round_1)

# Replace NA with NoData if character and -99999 if numeric
# so tract 42091206702 doesn't mess up breaks and means by indicator
ipd <- ipd %>% mutate_if(is.character, ~(ifelse(is.na(.), "NoData", .))) %>%
mutate_if(is.numeric, ~(ifelse(is.na(.), -99999, .)))
ipd_summary[ipd_summary == -99999] <- NA

## EXPORT

Expand Down

0 comments on commit 71a69c0

Please sign in to comment.