diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..8477f44 --- /dev/null +++ b/.nojekyll @@ -0,0 +1 @@ +f50c4fc0 \ No newline at end of file diff --git a/assets/permits_animation.gif b/assets/permits_animation.gif new file mode 100644 index 0000000..911fe1f Binary files /dev/null and b/assets/permits_animation.gif differ diff --git a/final.html b/final.html new file mode 100644 index 0000000..352077f --- /dev/null +++ b/final.html @@ -0,0 +1,9670 @@ + + + + + + + + + + +Predicting New Construction in Philadelphia + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+
+

Predicting New Construction in Philadelphia

+

MUSA 508 Final Project

+
+ + + +
+ +
+
Author
+
+

Laura Frances and Nissim Lebovits

+
+
+ +
+
Published
+
+

December 3, 2023

+
+
+ + +
+ + +
+ +
+

1 Summary

+
+
+

2 Introduction

+
+
+Show the code +
required_packages <- c("tidyverse", "sf", "acs", "tidycensus", "sfdep", "kableExtra", "conflicted",
+                       "gganimate", "tmap", "gifski", "transformr", "ggpubr", "randomForest", "janitor",
+                       'igraph')
+install_and_load_packages(required_packages)
+
+source("utils/viz_utils.R")
+
+select <- dplyr::select
+filter <- dplyr::filter
+lag <- dplyr::lag
+
+options(scipen = 999, tigris_use_cache = TRUE, tigris_class = 'sf')
+
+crs <- 'epsg:2272'
+
+
+
    +
  1. Predict development pressure: how do we define “a lot of development”?

  2. +
  3. Define affordability burden: how do we define “affordability burden”? – % change year over year in population that is experience rate burden (will probably see extreme tipping points), growing population, % change in area incomes

  4. +
  5. Identify problem zoning

  6. +
  7. Calculate number of connected parcels

  8. +
  9. Predict development pressure at the block level

  10. +
  11. Identify not burdened areas

  12. +
  13. Identify problem zoning

  14. +
  15. Calcualte number of connected parcels

  16. +
  17. Advocate for upzoning in parcels where there is local development pressure, no affordability burden, problem zoning, and high number of connected parcels

  18. +
+
    +
  • transit
  • +
  • zoning (OSM)
  • +
  • land use (OSM)
  • +
+
+
+Show the code +
urls <- c(
+  phl = "https://opendata.arcgis.com/datasets/405ec3da942d4e20869d4e1449a2be48_0.geojson",
+  phl_bgs = "https://opendata.arcgis.com/datasets/2f982bada233478ea0100528227febce_0.geojson",
+  nbhoods = "https://raw.githubusercontent.com/opendataphilly/open-geo-data/master/Neighborhoods_Philadelphia/Neighborhoods_Philadelphia.geojson",
+  roads = 'https://opendata.arcgis.com/datasets/261eeb49dfd44ccb8a4b6a0af830fdc8_0.geojson',
+  historic_districts = "https://phl.carto.com/api/v2/sql?q=SELECT+*+FROM+historicdistricts_local&filename=historicdistricts_local&format=geojson&skipfields=cartodb_id",
+  zoning = "https://opendata.arcgis.com/datasets/0bdb0b5f13774c03abf8dc2f1aa01693_0.geojson",
+  overlays = "https://opendata.arcgis.com/datasets/04fd29a8c022471994900cb0fd791bfc_0.geojson",
+  opa = "data/opa_properties.geojson",
+  building_permits = building_permits_path,
+  acs14 = acs_vars14_path,
+  acs19 = acs_vars19_path,
+  trolley_stops = "data/Trolley_Stations.geojson",
+  subway_stops = "data/Highspeed_Stations.geojson"
+)
+
+suppressMessages({
+  invisible(
+    imap(urls, ~ assign(.y, phl_spat_read(.x), envir = .GlobalEnv))
+  )
+})
+
+phl_bgs <- phl_bgs %>%
+              select(GEOID10)
+
+broad_and_market <- roads %>% filter(ST_NAME %in% c('BROAD',"MARKET") | SEG_ID %in% c(440370, 421347,421338,421337,422413,423051,440403,440402,440391,440380))
+
+subway_stops <- subway_stops[phl, ]
+transit_stops <- st_union(trolley_stops, subway_stops)
+
+historic_districts <- historic_districts %>%
+                        mutate(hist_dist = name) %>%
+                        select(hist_dist)
+
+nbhoods <- nbhoods %>%
+            select(mapname)
+
+overlays <- overlays %>% clean_names()
+overlays$overlay_symbol <- gsub("/", "", overlays$overlay_symbol) 
+overlays <- overlays %>%
+              mutate(overlay_symbol = ifelse(overlay_symbol == "[NA]", "Other", overlay_symbol))
+                        
+building_permits <- building_permits %>%
+                      filter(permittype %in% c("RESIDENTIAL BUILDING", "BP_ADDITION", "BP_NEWCNST"))
+
+acs_reg_vars <- c(
+  "GEOID",
+  "Total_Pop", 
+  "Med_Inc",
+  "Percent_Nonwhite",
+  "Percent_Renters",
+  "Rent_Burden",
+  "Ext_Rent_Burden"
+  )
+
+acs14_reg_vars <- phl_spat_read(acs_vars14_path) %>%
+                    st_drop_geometry() %>%
+                    select(acs_reg_vars) %>%
+                    clean_names() %>%
+                    rename(GEOID10 = geoid)
+
+acs19_reg_vars <- phl_spat_read(acs_vars19_path) %>%
+                    st_drop_geometry() %>%
+                    select(acs_reg_vars) %>%
+                    clean_names() %>%
+                    rename(GEOID10 = geoid)
+
+acs_tot <- acs14_reg_vars %>%
+             left_join(acs19_reg_vars, by = "GEOID10", suffix = c("_14", "_19")) %>%
+             mutate(pct_med_inc_change = med_inc_19 - med_inc_14 / med_inc_14,
+                    pct_tot_pop_change = total_pop_19 - total_pop_14 / total_pop_14,
+                    pct_renters_change = percent_renters_19 - percent_renters_14,
+                    pct_rent_burdenchange = rent_burden_19 - rent_burden_14)
+
+
+
+
+Show the code +
ggplot(phl_bgs) +
+  geom_sf() +
+  theme_void()
+
+
+

+
+
+
+
+Show the code +
phl_bgs <- phl_spat_read("https://opendata.arcgis.com/datasets/2f982bada233478ea0100528227febce_0.geojson") %>%
+              select(GEOID10)
+
+# Create a complete grid of GEOID10 and year
+geoid_years <- expand.grid(GEOID10 = unique(phl_bgs$GEOID10),
+                           year = unique(building_permits$year))
+
+
+
+# Joining your existing data with the complete grid
+permits_bg <- st_join(phl_bgs, building_permits) %>%
+              group_by(GEOID10, year) %>%
+              summarize(permits_count = sum(permits_count, na.rm = TRUE)) %>%
+              st_drop_geometry() %>%
+              right_join(geoid_years, by = c("GEOID10", "year")) %>%
+              replace_na(list(permits_count = 0)) %>%
+              left_join(phl_bgs, by = "GEOID10") %>%
+              st_as_sf() 
+
+
+### spat + temp lags----------------------------------
+suppressMessages(
+permits_bg <- permits_bg %>%
+              group_by(year) %>%
+              mutate(nb = st_knn(geometry, 5),
+                         wt = st_weights(nb),
+                      lag_spatial = st_lag(permits_count, nb, wt)) %>% # calculate spat lag
+              ungroup()%>%
+              arrange(GEOID10, year) %>% # calculate time lag
+               mutate(
+                 lag_1_year = lag(permits_count, 1),
+                 lag_2_years = lag(permits_count, 2),
+                 lag_3_years = lag(permits_count, 3),
+                 lag_4_years = lag(permits_count, 4),
+                 lag_5_years = lag(permits_count, 5),
+                 lag_6_years = lag(permits_count, 6),
+                 lag_7_years = lag(permits_count, 7),
+                 lag_8_years = lag(permits_count, 8),
+                 lag_9_years = lag(permits_count, 9),
+                 dist_to_2022 = 2022 - year
+               )
+)
+
+### distance to transit---------------------------------
+phl_bgs <- phl_bgs %>%
+  rowwise() %>%
+  mutate(
+    dist_to_transit = as.numeric(min(st_distance(st_point_on_surface(geometry), transit_stops$geometry)))
+  ) %>%
+  ungroup()
+
+### historic dists---------------------------------
+hist_dists_x_bg <- st_join(phl_bgs, historic_districts) %>%
+              mutate(hist_dist = ifelse(is.na(hist_dist), 0, 1))
+
+hist_dists_x_bg <- st_join(phl_bgs, historic_districts) %>%
+                      st_drop_geometry() %>%
+                      mutate(hist_dist_present = 1) %>%
+                      group_by(GEOID10, hist_dist) %>%
+                      summarize(hist_dist_present = max(hist_dist_present, na.rm = TRUE), .groups = 'drop') %>%
+                      pivot_wider(names_from = "hist_dist", values_from = "hist_dist_present", 
+                                  names_prefix = "hist_dist_", values_fill = list("hist_dist_present" = 0))
+
+phl_bgs <- left_join(phl_bgs, hist_dists_x_bg, by = "GEOID10")
+
+### hoods---------------------------------              
+intersection <- st_intersection(phl_bgs, nbhoods)
+intersection$intersection_area <- st_area(intersection)
+max_intersection <- intersection %>%
+  group_by(GEOID10) %>%
+  filter(intersection_area == max(intersection_area)) %>%
+  ungroup() %>%
+  select(GEOID10, mapname) %>%
+  st_drop_geometry()
+
+bgs_w_hood <- left_join(phl_bgs, max_intersection, by = c("GEOID10" = "GEOID10")) %>%
+                st_drop_geometry()
+
+phl_bgs <- left_join(phl_bgs, bgs_w_hood, by = "GEOID10")
+
+### overlays---------------------------------
+overlays_x_bg <- st_join(phl_bgs, overlays %>% select(overlay_symbol)) %>%
+                      st_drop_geometry() %>%
+                      mutate(overlay_present = 1) %>%
+                      group_by(GEOID10, overlay_symbol) %>%
+                      summarize(overlay_present = max(overlay_present, na.rm = TRUE), .groups = 'drop') %>%
+                      pivot_wider(names_from = "overlay_symbol", values_from = "overlay_present", 
+                                  names_prefix = "overlay_", values_fill = list("overlay_present" = 0))
+
+phl_bgs <- left_join(phl_bgs, overlays_x_bg, by = "GEOID10")
+
+
+### join back together----------------------
+permits_bg <- left_join(permits_bg,
+                     st_drop_geometry(phl_bgs),
+                     by = "GEOID10")
+
+### acs vars--------------------------------------------
+
+permits_pre_2019 <- filter(permits_bg, year < 2019)
+permits_post_2018 <- filter(permits_bg, year >= 2019)
+
+
+permits_joined_pre_2019 <- left_join(permits_pre_2019, acs14_reg_vars, by = "GEOID10")
+permits_joined_post_2018 <- left_join(permits_post_2018, acs19_reg_vars, by = "GEOID10")
+
+
+permits_bg <- bind_rows(permits_joined_pre_2019, permits_joined_post_2018)
+
+### clean-----------------------------------------------------
+permits_bg <- permits_bg %>%
+                select(-c(nb, wt, GEOID10)) %>%
+                clean_names()
+
+
+
+
+Show the code +
tm <- tm_shape(permits_bg %>% filter(year != 2012)) +
+        tm_polygons(col = "permits_count", border.alpha = 0, palette = 'viridis', style = "fisher", colorNA = "lightgrey") +
+  tm_facets(along = "year") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
+
+suppressMessages(
+tmap_animation(tm, "assets/permits_animation.gif", delay = 50)
+)
+
+
+
+
+

+
Philadelphia Building Permits, 2013 - 2022
+
+
+
+
+

3 Methods

+
+

3.1 Data

+
+
+Show the code +
ggplot(building_permits, aes(x = as.factor(year))) +
+  geom_bar() +
+  labs(title = "Permits per Year")
+
+
+

+
+
+Show the code +
ggplot(permits_bg %>% st_drop_geometry, aes(x = permits_count)) +
+  geom_histogram() +
+  labs(title = "Permits per Block Group per Year",
+       subtitle = "Log-Transformed") +
+  scale_x_log10() +
+  facet_wrap(~year)
+
+
+

+
+
+Show the code +
# 
+# tm_shape(permits_bg) +
+#   tm_polygons("spat_lag_permits", border.alpha = 0, palette = 'viridis', style = "fisher", colorNA = "lightgrey") +
+#   tm_facets(along = "year") +
+# tm_shape(broad_and_market) +
+#   tm_lines(col = "lightgrey") +
+#   tm_layout(frame = FALSE)
+
+
+
+

3.1.1 Feature Engineering

+
+
+Show the code +
permits_bg_long <- permits_bg %>%
+                    st_drop_geometry() %>%
+                    pivot_longer(
+                      cols = c(starts_with("lag"), dist_to_2022),
+                      names_to = "Variable",
+                      values_to = "Value"
+                    )
+
+
+ggscatter(permits_bg_long, x = "permits_count", y = "Value", facet.by = "Variable",
+   add = "reg.line",
+   add.params = list(color = "blue", fill = "lightgray"),
+   conf.int = TRUE
+   ) + stat_cor(method = "pearson", p.accuracy = 0.001, r.accuracy = 0.01)
+
+
+

+
+
+
+
+
+

3.2 Models

+
+
+
+

4 Results

+
+

4.1 OLS Regression

+

To begin, we run a simple regression incorporating three engineered groups of features: space lag, time lag, and distance to 2022. We include this last variable because of a Philadelphia tax abatement policy that led to a significant increase in residential development in the years immediately before 2022. We will use this as a baseline model to compare to our more complex models.

+
+
+Show the code +
permits_train <- filter(permits_bg %>% select(-mapname), year < 2022)
+permits_test <- filter(permits_bg %>% select(-mapname), year == 2022)
+
+reg <- lm(permits_count ~ ., data = st_drop_geometry(permits_train))
+
+predictions <- predict(reg, permits_test)
+predictions <- cbind(permits_test, predictions)
+
+predictions <- predictions %>%
+                  mutate(abs_error = abs(permits_count - predictions),
+                         pct_error = abs_error / permits_count)
+
+ggplot(predictions, aes(x = permits_count, y = predictions)) +
+  geom_point() +
+  labs(title = "Predicted vs. Actual Permits",
+       subtitle = "2022") +
+  geom_smooth(method = "lm", se = FALSE)
+
+
+

+
+
+Show the code +
mae <- paste0("MAE: ", round(mean(predictions$abs_error, na.rm = TRUE), 2))
+
+tm_shape(predictions) +
+        tm_polygons(col = "abs_error", border.alpha = 0, palette = 'viridis', style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
+
+
+

+
+
+
+
+

4.2 Poisson Model

+

Given that we are dealing with a skewed distribution of count data

+
+
+Show the code +
poisson_reg <- glm(permits_count ~ ., 
+                   family = poisson(link = "log"),
+                   data = st_drop_geometry(permits_train))
+
+poisson_predictions <- predict(poisson_reg, permits_test, type = "response")
+poisson_predictions <- cbind(permits_test, poisson_predictions)
+
+poisson_predictions <- poisson_predictions %>%
+                           mutate(abs_error = abs(permits_count - poisson_predictions),
+                                  pct_error = abs_error / permits_count)
+
+ggplot(poisson_predictions, aes(x = permits_count, y = poisson_predictions)) +
+  geom_point() +
+  labs(title = "Predicted vs. Actual Permits",
+       subtitle = "2022") +
+  geom_smooth(method = "lm", se = FALSE)
+
+
+

+
+
+Show the code +
poisson_mae <- paste0("MAE: ", round(mean(poisson_predictions$abs_error, na.rm = TRUE), 2))
+
+tm_shape(poisson_predictions) +
+        tm_polygons(col = "abs_error", border.alpha = 0, palette = 'viridis', style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
+
+
+

+
+
+

We find that our OLS model has an MAE of only MAE: 2.44–not bad for such a simple model! Still, it struggles most in the areas where we most need it to succeed, so we will try to introduce better variables and apply a more complex model to improve our predictions.

+
+
+

4.3 Random Forest Regression

+
+
+Show the code +
rf <- randomForest(permits_count ~ ., 
+                   data = st_drop_geometry(permits_train),
+                   importance = TRUE, 
+                   na.action = na.omit)
+
+rf_predictions <- predict(rf, permits_test)
+rf_predictions <- cbind(permits_test, rf_predictions)
+rf_predictions <- rf_predictions %>%
+                  mutate(abs_error = abs(permits_count - rf_predictions),
+                         pct_error = abs_error / (permits_count + 0.0001))
+
+ggplot(rf_predictions, aes(x = permits_count, y = rf_predictions)) +
+  geom_point() +
+  labs(title = "Predicted vs. Actual Permits",
+       subtitle = "2022") +
+  geom_smooth(method = "lm", se = FALSE)
+
+
+

+
+
+Show the code +
rf_mae <- paste0("MAE: ", round(mean(rf_predictions$abs_error, na.rm = TRUE), 2))
+
+tm_shape(rf_predictions) +
+        tm_polygons(col = "abs_error", border.alpha = 0, palette = 'viridis', style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
+
+
+

+
+
+
+
+
+

5 Discussion

+
+

5.1 Accuracy

+

Predominately, our model overpredicts, which is better than underpredicting, as it facilitates new development.

+
+
+Show the code +
nbins <- as.integer(sqrt(nrow(rf_predictions)))
+vline <- mean(rf_predictions$abs_error, na.rm = TRUE)
+
+ggplot(rf_predictions, aes(x = abs_error)) +
+  geom_histogram(bins = nbins) +
+  geom_vline(aes(xintercept = vline))
+
+
+

+
+
+
+
+

5.2 Generalizabiltiy

+
+
+Show the code +
tm_shape(acs19) +
+        tm_polygons(col = "Percent_Nonwhite", border.alpha = 0, palette = 'viridis', style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
+
+
+

+
+
+Show the code +
rf_predictions <- rf_predictions %>%
+                      mutate(race_comp = case_when(
+                        percent_nonwhite >= .50 ~ "Majority Non-White",
+                        TRUE ~ "Majority White"
+                      ))
+
+ggplot(rf_predictions, aes(y = abs_error, color = race_comp)) +
+  geom_boxplot(fill = NA)
+
+
+

+
+
+

How does this generalize across neighborhoods?

+

How does this generalize across council districts?

+
+
+

5.3 Assessing Upzoning Needs

+

We can identify conflict between projected development and current zoning.

+

Look at zoning that is industrial or residential single family in areas that our model suggests are high development risk for 2023:

+
+
+Show the code +
filtered_zoning <- zoning %>%
+                     filter(str_detect(CODE, "RS") | str_detect(CODE, "I"))
+
+
+tm_shape(filtered_zoning) +
+        tm_polygons(col = "CODE", border.alpha = 0, colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
+
+
+

+
+
+

We can extract development predictions at the block level to these parcels and then visualize them by highest need.

+
+
+Show the code +
filtered_zoning <- st_join(
+  filtered_zoning,
+  rf_predictions %>% select(rf_predictions)
+)
+
+tm_shape(filtered_zoning) +
+        tm_polygons(col = "rf_predictions", border.alpha = 0, colorNA = "lightgrey", palette = "viridis", style = "fisher") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
+
+
+

+
+
+
+
+Show the code +
tmap_mode('view')
+
+filtered_zoning %>%
+  filter(rf_predictions > 10) %>%
+tm_shape() +
+        tm_polygons(col = "CODE", border.alpha = 0, colorNA = "lightgrey",
+                    popup.vars = c('rf_predictions', 'CODE')) +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
+
+
+ +
+ +
+
+

Furthermore, we can identify properties with high potential for assemblage, which suggests the ability to accomodate high-density, multi-unit housing.

+
+
+Show the code +
nbs <- filtered_zoning %>% 
+  mutate(nb = st_contiguity(geometry))
+
+# Create edge list while handling cases with no neighbors
+edge_list <- tibble::tibble(id = 1:length(nbs$nb), nbs = nbs$nb) %>% 
+  tidyr::unnest(nbs) %>% 
+  filter(nbs != 0)
+
+# Create a graph with a node for each row in filtered_zoning
+g <- make_empty_graph(n = nrow(filtered_zoning))
+V(g)$name <- as.character(1:nrow(filtered_zoning))
+
+# Add edges if they exist
+if (nrow(edge_list) > 0) {
+  edges <- as.matrix(edge_list)
+  g <- add_edges(g, c(t(edges)))
+}
+
+# Calculate the number of contiguous neighbors, handling nodes without neighbors
+n_contiguous <- sapply(V(g)$name, function(node) {
+  if (node %in% edges) {
+    length(neighborhood(g, order = 1, nodes = as.numeric(node))[[1]])
+  } else {
+    1  # Nodes without neighbors count as 1 (themselves)
+  }
+})
+
+filtered_zoning <- filtered_zoning %>%
+                    mutate(n_contig = n_contiguous)
+
+filtered_zoning %>%
+  st_drop_geometry() %>%
+  select(rf_predictions,
+         n_contig,
+         OBJECTID,
+         CODE) %>%
+  filter(rf_predictions > 10,
+         n_contig > 2) %>%
+  kablerize(caption = "Poorly-Zoned Properties with High Development Risk")
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Poorly-Zoned Properties with High Development Risk
rf_predictionsn_contigOBJECTIDCODE
27512.167103604I2
85821.5859761524ICMX
90312.9468361595RSA5
91621.5859781615ICMX
152211.2443742559RSA5
163121.5859732736IRMX
164321.5859762756ICMX
167021.5859772803ICMX
167121.5859732804IRMX
186534.3893033128IRMX
209912.9468363492RSA5
225111.4208043744IRMX
268512.9468364533RSA5
316810.5881735506RSA5
343921.5859766067ICMX
355412.9468366289RSA5
361621.5859736405RSA5
383612.9468376869ICMX
384634.3893036901ICMX
386512.9468366943RSA5
415529.6927037646ICMX
417811.2138337704IRMX
459312.9468368805RSA5
469723.0356339094RSA5
475216.0827359244IRMX
480010.5881739371ICMX
491021.5859739662RSA5
521135.68783310411ICMX
521235.68783310412RSA5
521335.68783310413ICMX
534518.21887310760IRMX
550139.00537311150ICMX
550635.68783311161RSA5
561611.24437311449RSA3
599611.00043412339I2
620611.00043312808RSA5
625811.00043312932ICMX
631216.08273613058ICMX
641223.99983313314IRMX
670322.58767313979I3
670511.00043313981RSA3
672013.26530314019RSA5
6793.211.00043814223I2
679422.58767314224ICMX
680822.58767314257I2
685511.00043314373RSA5
686411.00043314401RSA5
686511.00043314402RSA5
696513.26530314649ICMX
701412.94683814748ICMX
719211.24437315167RSA2
744639.00537315720I2
748711.00043615820RSA5
763713.26530316181RSA5
788539.00537316719RSA5
810515.75340317170ICMX
813213.26530317217ICMX
822015.48503317410RSA5
854313.56143318033RSD3
907513.56143319078RSA3
936211.24437319593RSA3
960721.58597420075ICMX
980212.16710320444RSA5
985413.56143420536RSA2
1037713.26530321529ICMX
1065113.56143322004RSD1
1287729.69270425778RSA5
1292911.24437425868RSA1
1316413.56143326249RSD1
1349410.85073326759RSA5
1359911.24437326935RSA3
1381711.24437327284RSA2
1389211.24437327394RSD3
1411013.26530427776I2
1415816.06680327871IRMX
14719.221.585972828949I2
14719.312.946832828949I2
1472012.94683628950RSA5
+ + +
+
+Show the code +
tmap_mode('view')
+
+filtered_zoning %>%
+  select(rf_predictions,
+         n_contig,
+         OBJECTID,
+         CODE) %>%
+  filter(rf_predictions > 10,
+         n_contig > 2) %>%
+tm_shape() +
+        tm_polygons(col = "rf_predictions", border.alpha = 0, colorNA = "lightgrey", palette = "viridis", style = "fisher",
+                    popup.vars = c('rf_predictions', 'n_contig', 'CODE'), alpha = 0.5) +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
+
+
+ +
+ +
+
+
+
+
+

6 Conclusion

+
+
+

7 Appendices

+
+ +
+ + +
+ + + + \ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 0000000..352077f --- /dev/null +++ b/index.html @@ -0,0 +1,9670 @@ + + + + + + + + + + +Predicting New Construction in Philadelphia + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+
+

Predicting New Construction in Philadelphia

+

MUSA 508 Final Project

+
+ + + +
+ +
+
Author
+
+

Laura Frances and Nissim Lebovits

+
+
+ +
+
Published
+
+

December 3, 2023

+
+
+ + +
+ + +
+ +
+

1 Summary

+
+
+

2 Introduction

+
+
+Show the code +
required_packages <- c("tidyverse", "sf", "acs", "tidycensus", "sfdep", "kableExtra", "conflicted",
+                       "gganimate", "tmap", "gifski", "transformr", "ggpubr", "randomForest", "janitor",
+                       'igraph')
+install_and_load_packages(required_packages)
+
+source("utils/viz_utils.R")
+
+select <- dplyr::select
+filter <- dplyr::filter
+lag <- dplyr::lag
+
+options(scipen = 999, tigris_use_cache = TRUE, tigris_class = 'sf')
+
+crs <- 'epsg:2272'
+
+
+
    +
  1. Predict development pressure: how do we define “a lot of development”?

  2. +
  3. Define affordability burden: how do we define “affordability burden”? – % change year over year in population that is experience rate burden (will probably see extreme tipping points), growing population, % change in area incomes

  4. +
  5. Identify problem zoning

  6. +
  7. Calculate number of connected parcels

  8. +
  9. Predict development pressure at the block level

  10. +
  11. Identify not burdened areas

  12. +
  13. Identify problem zoning

  14. +
  15. Calcualte number of connected parcels

  16. +
  17. Advocate for upzoning in parcels where there is local development pressure, no affordability burden, problem zoning, and high number of connected parcels

  18. +
+
    +
  • transit
  • +
  • zoning (OSM)
  • +
  • land use (OSM)
  • +
+
+
+Show the code +
urls <- c(
+  phl = "https://opendata.arcgis.com/datasets/405ec3da942d4e20869d4e1449a2be48_0.geojson",
+  phl_bgs = "https://opendata.arcgis.com/datasets/2f982bada233478ea0100528227febce_0.geojson",
+  nbhoods = "https://raw.githubusercontent.com/opendataphilly/open-geo-data/master/Neighborhoods_Philadelphia/Neighborhoods_Philadelphia.geojson",
+  roads = 'https://opendata.arcgis.com/datasets/261eeb49dfd44ccb8a4b6a0af830fdc8_0.geojson',
+  historic_districts = "https://phl.carto.com/api/v2/sql?q=SELECT+*+FROM+historicdistricts_local&filename=historicdistricts_local&format=geojson&skipfields=cartodb_id",
+  zoning = "https://opendata.arcgis.com/datasets/0bdb0b5f13774c03abf8dc2f1aa01693_0.geojson",
+  overlays = "https://opendata.arcgis.com/datasets/04fd29a8c022471994900cb0fd791bfc_0.geojson",
+  opa = "data/opa_properties.geojson",
+  building_permits = building_permits_path,
+  acs14 = acs_vars14_path,
+  acs19 = acs_vars19_path,
+  trolley_stops = "data/Trolley_Stations.geojson",
+  subway_stops = "data/Highspeed_Stations.geojson"
+)
+
+suppressMessages({
+  invisible(
+    imap(urls, ~ assign(.y, phl_spat_read(.x), envir = .GlobalEnv))
+  )
+})
+
+phl_bgs <- phl_bgs %>%
+              select(GEOID10)
+
+broad_and_market <- roads %>% filter(ST_NAME %in% c('BROAD',"MARKET") | SEG_ID %in% c(440370, 421347,421338,421337,422413,423051,440403,440402,440391,440380))
+
+subway_stops <- subway_stops[phl, ]
+transit_stops <- st_union(trolley_stops, subway_stops)
+
+historic_districts <- historic_districts %>%
+                        mutate(hist_dist = name) %>%
+                        select(hist_dist)
+
+nbhoods <- nbhoods %>%
+            select(mapname)
+
+overlays <- overlays %>% clean_names()
+overlays$overlay_symbol <- gsub("/", "", overlays$overlay_symbol) 
+overlays <- overlays %>%
+              mutate(overlay_symbol = ifelse(overlay_symbol == "[NA]", "Other", overlay_symbol))
+                        
+building_permits <- building_permits %>%
+                      filter(permittype %in% c("RESIDENTIAL BUILDING", "BP_ADDITION", "BP_NEWCNST"))
+
+acs_reg_vars <- c(
+  "GEOID",
+  "Total_Pop", 
+  "Med_Inc",
+  "Percent_Nonwhite",
+  "Percent_Renters",
+  "Rent_Burden",
+  "Ext_Rent_Burden"
+  )
+
+acs14_reg_vars <- phl_spat_read(acs_vars14_path) %>%
+                    st_drop_geometry() %>%
+                    select(acs_reg_vars) %>%
+                    clean_names() %>%
+                    rename(GEOID10 = geoid)
+
+acs19_reg_vars <- phl_spat_read(acs_vars19_path) %>%
+                    st_drop_geometry() %>%
+                    select(acs_reg_vars) %>%
+                    clean_names() %>%
+                    rename(GEOID10 = geoid)
+
+acs_tot <- acs14_reg_vars %>%
+             left_join(acs19_reg_vars, by = "GEOID10", suffix = c("_14", "_19")) %>%
+             mutate(pct_med_inc_change = med_inc_19 - med_inc_14 / med_inc_14,
+                    pct_tot_pop_change = total_pop_19 - total_pop_14 / total_pop_14,
+                    pct_renters_change = percent_renters_19 - percent_renters_14,
+                    pct_rent_burdenchange = rent_burden_19 - rent_burden_14)
+
+
+
+
+Show the code +
ggplot(phl_bgs) +
+  geom_sf() +
+  theme_void()
+
+
+

+
+
+
+
+Show the code +
phl_bgs <- phl_spat_read("https://opendata.arcgis.com/datasets/2f982bada233478ea0100528227febce_0.geojson") %>%
+              select(GEOID10)
+
+# Create a complete grid of GEOID10 and year
+geoid_years <- expand.grid(GEOID10 = unique(phl_bgs$GEOID10),
+                           year = unique(building_permits$year))
+
+
+
+# Joining your existing data with the complete grid
+permits_bg <- st_join(phl_bgs, building_permits) %>%
+              group_by(GEOID10, year) %>%
+              summarize(permits_count = sum(permits_count, na.rm = TRUE)) %>%
+              st_drop_geometry() %>%
+              right_join(geoid_years, by = c("GEOID10", "year")) %>%
+              replace_na(list(permits_count = 0)) %>%
+              left_join(phl_bgs, by = "GEOID10") %>%
+              st_as_sf() 
+
+
+### spat + temp lags----------------------------------
+suppressMessages(
+permits_bg <- permits_bg %>%
+              group_by(year) %>%
+              mutate(nb = st_knn(geometry, 5),
+                         wt = st_weights(nb),
+                      lag_spatial = st_lag(permits_count, nb, wt)) %>% # calculate spat lag
+              ungroup()%>%
+              arrange(GEOID10, year) %>% # calculate time lag
+               mutate(
+                 lag_1_year = lag(permits_count, 1),
+                 lag_2_years = lag(permits_count, 2),
+                 lag_3_years = lag(permits_count, 3),
+                 lag_4_years = lag(permits_count, 4),
+                 lag_5_years = lag(permits_count, 5),
+                 lag_6_years = lag(permits_count, 6),
+                 lag_7_years = lag(permits_count, 7),
+                 lag_8_years = lag(permits_count, 8),
+                 lag_9_years = lag(permits_count, 9),
+                 dist_to_2022 = 2022 - year
+               )
+)
+
+### distance to transit---------------------------------
+phl_bgs <- phl_bgs %>%
+  rowwise() %>%
+  mutate(
+    dist_to_transit = as.numeric(min(st_distance(st_point_on_surface(geometry), transit_stops$geometry)))
+  ) %>%
+  ungroup()
+
+### historic dists---------------------------------
+hist_dists_x_bg <- st_join(phl_bgs, historic_districts) %>%
+              mutate(hist_dist = ifelse(is.na(hist_dist), 0, 1))
+
+hist_dists_x_bg <- st_join(phl_bgs, historic_districts) %>%
+                      st_drop_geometry() %>%
+                      mutate(hist_dist_present = 1) %>%
+                      group_by(GEOID10, hist_dist) %>%
+                      summarize(hist_dist_present = max(hist_dist_present, na.rm = TRUE), .groups = 'drop') %>%
+                      pivot_wider(names_from = "hist_dist", values_from = "hist_dist_present", 
+                                  names_prefix = "hist_dist_", values_fill = list("hist_dist_present" = 0))
+
+phl_bgs <- left_join(phl_bgs, hist_dists_x_bg, by = "GEOID10")
+
+### hoods---------------------------------              
+intersection <- st_intersection(phl_bgs, nbhoods)
+intersection$intersection_area <- st_area(intersection)
+max_intersection <- intersection %>%
+  group_by(GEOID10) %>%
+  filter(intersection_area == max(intersection_area)) %>%
+  ungroup() %>%
+  select(GEOID10, mapname) %>%
+  st_drop_geometry()
+
+bgs_w_hood <- left_join(phl_bgs, max_intersection, by = c("GEOID10" = "GEOID10")) %>%
+                st_drop_geometry()
+
+phl_bgs <- left_join(phl_bgs, bgs_w_hood, by = "GEOID10")
+
+### overlays---------------------------------
+overlays_x_bg <- st_join(phl_bgs, overlays %>% select(overlay_symbol)) %>%
+                      st_drop_geometry() %>%
+                      mutate(overlay_present = 1) %>%
+                      group_by(GEOID10, overlay_symbol) %>%
+                      summarize(overlay_present = max(overlay_present, na.rm = TRUE), .groups = 'drop') %>%
+                      pivot_wider(names_from = "overlay_symbol", values_from = "overlay_present", 
+                                  names_prefix = "overlay_", values_fill = list("overlay_present" = 0))
+
+phl_bgs <- left_join(phl_bgs, overlays_x_bg, by = "GEOID10")
+
+
+### join back together----------------------
+permits_bg <- left_join(permits_bg,
+                     st_drop_geometry(phl_bgs),
+                     by = "GEOID10")
+
+### acs vars--------------------------------------------
+
+permits_pre_2019 <- filter(permits_bg, year < 2019)
+permits_post_2018 <- filter(permits_bg, year >= 2019)
+
+
+permits_joined_pre_2019 <- left_join(permits_pre_2019, acs14_reg_vars, by = "GEOID10")
+permits_joined_post_2018 <- left_join(permits_post_2018, acs19_reg_vars, by = "GEOID10")
+
+
+permits_bg <- bind_rows(permits_joined_pre_2019, permits_joined_post_2018)
+
+### clean-----------------------------------------------------
+permits_bg <- permits_bg %>%
+                select(-c(nb, wt, GEOID10)) %>%
+                clean_names()
+
+
+
+
+Show the code +
tm <- tm_shape(permits_bg %>% filter(year != 2012)) +
+        tm_polygons(col = "permits_count", border.alpha = 0, palette = 'viridis', style = "fisher", colorNA = "lightgrey") +
+  tm_facets(along = "year") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
+
+suppressMessages(
+tmap_animation(tm, "assets/permits_animation.gif", delay = 50)
+)
+
+
+
+
+

+
Philadelphia Building Permits, 2013 - 2022
+
+
+
+
+

3 Methods

+
+

3.1 Data

+
+
+Show the code +
ggplot(building_permits, aes(x = as.factor(year))) +
+  geom_bar() +
+  labs(title = "Permits per Year")
+
+
+

+
+
+Show the code +
ggplot(permits_bg %>% st_drop_geometry, aes(x = permits_count)) +
+  geom_histogram() +
+  labs(title = "Permits per Block Group per Year",
+       subtitle = "Log-Transformed") +
+  scale_x_log10() +
+  facet_wrap(~year)
+
+
+

+
+
+Show the code +
# 
+# tm_shape(permits_bg) +
+#   tm_polygons("spat_lag_permits", border.alpha = 0, palette = 'viridis', style = "fisher", colorNA = "lightgrey") +
+#   tm_facets(along = "year") +
+# tm_shape(broad_and_market) +
+#   tm_lines(col = "lightgrey") +
+#   tm_layout(frame = FALSE)
+
+
+
+

3.1.1 Feature Engineering

+
+
+Show the code +
permits_bg_long <- permits_bg %>%
+                    st_drop_geometry() %>%
+                    pivot_longer(
+                      cols = c(starts_with("lag"), dist_to_2022),
+                      names_to = "Variable",
+                      values_to = "Value"
+                    )
+
+
+ggscatter(permits_bg_long, x = "permits_count", y = "Value", facet.by = "Variable",
+   add = "reg.line",
+   add.params = list(color = "blue", fill = "lightgray"),
+   conf.int = TRUE
+   ) + stat_cor(method = "pearson", p.accuracy = 0.001, r.accuracy = 0.01)
+
+
+

+
+
+
+
+
+

3.2 Models

+
+
+
+

4 Results

+
+

4.1 OLS Regression

+

To begin, we run a simple regression incorporating three engineered groups of features: space lag, time lag, and distance to 2022. We include this last variable because of a Philadelphia tax abatement policy that led to a significant increase in residential development in the years immediately before 2022. We will use this as a baseline model to compare to our more complex models.

+
+
+Show the code +
permits_train <- filter(permits_bg %>% select(-mapname), year < 2022)
+permits_test <- filter(permits_bg %>% select(-mapname), year == 2022)
+
+reg <- lm(permits_count ~ ., data = st_drop_geometry(permits_train))
+
+predictions <- predict(reg, permits_test)
+predictions <- cbind(permits_test, predictions)
+
+predictions <- predictions %>%
+                  mutate(abs_error = abs(permits_count - predictions),
+                         pct_error = abs_error / permits_count)
+
+ggplot(predictions, aes(x = permits_count, y = predictions)) +
+  geom_point() +
+  labs(title = "Predicted vs. Actual Permits",
+       subtitle = "2022") +
+  geom_smooth(method = "lm", se = FALSE)
+
+
+

+
+
+Show the code +
mae <- paste0("MAE: ", round(mean(predictions$abs_error, na.rm = TRUE), 2))
+
+tm_shape(predictions) +
+        tm_polygons(col = "abs_error", border.alpha = 0, palette = 'viridis', style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
+
+
+

+
+
+
+
+

4.2 Poisson Model

+

Given that we are dealing with a skewed distribution of count data

+
+
+Show the code +
poisson_reg <- glm(permits_count ~ ., 
+                   family = poisson(link = "log"),
+                   data = st_drop_geometry(permits_train))
+
+poisson_predictions <- predict(poisson_reg, permits_test, type = "response")
+poisson_predictions <- cbind(permits_test, poisson_predictions)
+
+poisson_predictions <- poisson_predictions %>%
+                           mutate(abs_error = abs(permits_count - poisson_predictions),
+                                  pct_error = abs_error / permits_count)
+
+ggplot(poisson_predictions, aes(x = permits_count, y = poisson_predictions)) +
+  geom_point() +
+  labs(title = "Predicted vs. Actual Permits",
+       subtitle = "2022") +
+  geom_smooth(method = "lm", se = FALSE)
+
+
+

+
+
+Show the code +
poisson_mae <- paste0("MAE: ", round(mean(poisson_predictions$abs_error, na.rm = TRUE), 2))
+
+tm_shape(poisson_predictions) +
+        tm_polygons(col = "abs_error", border.alpha = 0, palette = 'viridis', style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
+
+
+

+
+
+

We find that our OLS model has an MAE of only MAE: 2.44–not bad for such a simple model! Still, it struggles most in the areas where we most need it to succeed, so we will try to introduce better variables and apply a more complex model to improve our predictions.

+
+
+

4.3 Random Forest Regression

+
+
+Show the code +
rf <- randomForest(permits_count ~ ., 
+                   data = st_drop_geometry(permits_train),
+                   importance = TRUE, 
+                   na.action = na.omit)
+
+rf_predictions <- predict(rf, permits_test)
+rf_predictions <- cbind(permits_test, rf_predictions)
+rf_predictions <- rf_predictions %>%
+                  mutate(abs_error = abs(permits_count - rf_predictions),
+                         pct_error = abs_error / (permits_count + 0.0001))
+
+ggplot(rf_predictions, aes(x = permits_count, y = rf_predictions)) +
+  geom_point() +
+  labs(title = "Predicted vs. Actual Permits",
+       subtitle = "2022") +
+  geom_smooth(method = "lm", se = FALSE)
+
+
+

+
+
+Show the code +
rf_mae <- paste0("MAE: ", round(mean(rf_predictions$abs_error, na.rm = TRUE), 2))
+
+tm_shape(rf_predictions) +
+        tm_polygons(col = "abs_error", border.alpha = 0, palette = 'viridis', style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
+
+
+

+
+
+
+
+
+

5 Discussion

+
+

5.1 Accuracy

+

Predominately, our model overpredicts, which is better than underpredicting, as it facilitates new development.

+
+
+Show the code +
nbins <- as.integer(sqrt(nrow(rf_predictions)))
+vline <- mean(rf_predictions$abs_error, na.rm = TRUE)
+
+ggplot(rf_predictions, aes(x = abs_error)) +
+  geom_histogram(bins = nbins) +
+  geom_vline(aes(xintercept = vline))
+
+
+

+
+
+
+
+

5.2 Generalizabiltiy

+
+
+Show the code +
tm_shape(acs19) +
+        tm_polygons(col = "Percent_Nonwhite", border.alpha = 0, palette = 'viridis', style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
+
+
+

+
+
+Show the code +
rf_predictions <- rf_predictions %>%
+                      mutate(race_comp = case_when(
+                        percent_nonwhite >= .50 ~ "Majority Non-White",
+                        TRUE ~ "Majority White"
+                      ))
+
+ggplot(rf_predictions, aes(y = abs_error, color = race_comp)) +
+  geom_boxplot(fill = NA)
+
+
+

+
+
+

How does this generalize across neighborhoods?

+

How does this generalize across council districts?

+
+
+

5.3 Assessing Upzoning Needs

+

We can identify conflict between projected development and current zoning.

+

Look at zoning that is industrial or residential single family in areas that our model suggests are high development risk for 2023:

+
+
+Show the code +
filtered_zoning <- zoning %>%
+                     filter(str_detect(CODE, "RS") | str_detect(CODE, "I"))
+
+
+tm_shape(filtered_zoning) +
+        tm_polygons(col = "CODE", border.alpha = 0, colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
+
+
+

+
+
+

We can extract development predictions at the block level to these parcels and then visualize them by highest need.

+
+
+Show the code +
filtered_zoning <- st_join(
+  filtered_zoning,
+  rf_predictions %>% select(rf_predictions)
+)
+
+tm_shape(filtered_zoning) +
+        tm_polygons(col = "rf_predictions", border.alpha = 0, colorNA = "lightgrey", palette = "viridis", style = "fisher") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
+
+
+

+
+
+
+
+Show the code +
tmap_mode('view')
+
+filtered_zoning %>%
+  filter(rf_predictions > 10) %>%
+tm_shape() +
+        tm_polygons(col = "CODE", border.alpha = 0, colorNA = "lightgrey",
+                    popup.vars = c('rf_predictions', 'CODE')) +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
+
+
+ +
+ +
+
+

Furthermore, we can identify properties with high potential for assemblage, which suggests the ability to accomodate high-density, multi-unit housing.

+
+
+Show the code +
nbs <- filtered_zoning %>% 
+  mutate(nb = st_contiguity(geometry))
+
+# Create edge list while handling cases with no neighbors
+edge_list <- tibble::tibble(id = 1:length(nbs$nb), nbs = nbs$nb) %>% 
+  tidyr::unnest(nbs) %>% 
+  filter(nbs != 0)
+
+# Create a graph with a node for each row in filtered_zoning
+g <- make_empty_graph(n = nrow(filtered_zoning))
+V(g)$name <- as.character(1:nrow(filtered_zoning))
+
+# Add edges if they exist
+if (nrow(edge_list) > 0) {
+  edges <- as.matrix(edge_list)
+  g <- add_edges(g, c(t(edges)))
+}
+
+# Calculate the number of contiguous neighbors, handling nodes without neighbors
+n_contiguous <- sapply(V(g)$name, function(node) {
+  if (node %in% edges) {
+    length(neighborhood(g, order = 1, nodes = as.numeric(node))[[1]])
+  } else {
+    1  # Nodes without neighbors count as 1 (themselves)
+  }
+})
+
+filtered_zoning <- filtered_zoning %>%
+                    mutate(n_contig = n_contiguous)
+
+filtered_zoning %>%
+  st_drop_geometry() %>%
+  select(rf_predictions,
+         n_contig,
+         OBJECTID,
+         CODE) %>%
+  filter(rf_predictions > 10,
+         n_contig > 2) %>%
+  kablerize(caption = "Poorly-Zoned Properties with High Development Risk")
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Poorly-Zoned Properties with High Development Risk
rf_predictionsn_contigOBJECTIDCODE
27512.167103604I2
85821.5859761524ICMX
90312.9468361595RSA5
91621.5859781615ICMX
152211.2443742559RSA5
163121.5859732736IRMX
164321.5859762756ICMX
167021.5859772803ICMX
167121.5859732804IRMX
186534.3893033128IRMX
209912.9468363492RSA5
225111.4208043744IRMX
268512.9468364533RSA5
316810.5881735506RSA5
343921.5859766067ICMX
355412.9468366289RSA5
361621.5859736405RSA5
383612.9468376869ICMX
384634.3893036901ICMX
386512.9468366943RSA5
415529.6927037646ICMX
417811.2138337704IRMX
459312.9468368805RSA5
469723.0356339094RSA5
475216.0827359244IRMX
480010.5881739371ICMX
491021.5859739662RSA5
521135.68783310411ICMX
521235.68783310412RSA5
521335.68783310413ICMX
534518.21887310760IRMX
550139.00537311150ICMX
550635.68783311161RSA5
561611.24437311449RSA3
599611.00043412339I2
620611.00043312808RSA5
625811.00043312932ICMX
631216.08273613058ICMX
641223.99983313314IRMX
670322.58767313979I3
670511.00043313981RSA3
672013.26530314019RSA5
6793.211.00043814223I2
679422.58767314224ICMX
680822.58767314257I2
685511.00043314373RSA5
686411.00043314401RSA5
686511.00043314402RSA5
696513.26530314649ICMX
701412.94683814748ICMX
719211.24437315167RSA2
744639.00537315720I2
748711.00043615820RSA5
763713.26530316181RSA5
788539.00537316719RSA5
810515.75340317170ICMX
813213.26530317217ICMX
822015.48503317410RSA5
854313.56143318033RSD3
907513.56143319078RSA3
936211.24437319593RSA3
960721.58597420075ICMX
980212.16710320444RSA5
985413.56143420536RSA2
1037713.26530321529ICMX
1065113.56143322004RSD1
1287729.69270425778RSA5
1292911.24437425868RSA1
1316413.56143326249RSD1
1349410.85073326759RSA5
1359911.24437326935RSA3
1381711.24437327284RSA2
1389211.24437327394RSD3
1411013.26530427776I2
1415816.06680327871IRMX
14719.221.585972828949I2
14719.312.946832828949I2
1472012.94683628950RSA5
+ + +
+
+Show the code +
tmap_mode('view')
+
+filtered_zoning %>%
+  select(rf_predictions,
+         n_contig,
+         OBJECTID,
+         CODE) %>%
+  filter(rf_predictions > 10,
+         n_contig > 2) %>%
+tm_shape() +
+        tm_polygons(col = "rf_predictions", border.alpha = 0, colorNA = "lightgrey", palette = "viridis", style = "fisher",
+                    popup.vars = c('rf_predictions', 'n_contig', 'CODE'), alpha = 0.5) +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
+
+
+ +
+ +
+
+
+
+
+

6 Conclusion

+
+
+

7 Appendices

+
+ +
+ + +
+ + + + \ No newline at end of file