diff --git a/.nojekyll b/.nojekyll index 7565821..b083c9a 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -8cb23537 \ No newline at end of file +3293c4dc \ No newline at end of file diff --git a/assets/permits_animation.gif b/assets/permits_animation.gif index 0f53bd7..b2f6a46 100644 Binary files a/assets/permits_animation.gif and b/assets/permits_animation.gif differ diff --git a/final.html b/final.html index 99f12e1..3e4c12a 100644 --- a/final.html +++ b/final.html @@ -3302,20 +3302,18 @@ document.body.style.height = "100%"; document.documentElement.style.width = "100%"; document.documentElement.style.height = "100%"; - if (cel) { - cel.style.position = "absolute"; - var pad = unpackPadding(sizing.padding); - cel.style.top = pad.top + "px"; - cel.style.right = pad.right + "px"; - cel.style.bottom = pad.bottom + "px"; - cel.style.left = pad.left + "px"; - el.style.width = "100%"; - el.style.height = "100%"; - } + cel.style.position = "absolute"; + var pad = unpackPadding(sizing.padding); + cel.style.top = pad.top + "px"; + cel.style.right = pad.right + "px"; + cel.style.bottom = pad.bottom + "px"; + cel.style.left = pad.left + "px"; + el.style.width = "100%"; + el.style.height = "100%"; return { - getWidth: function() { return cel.offsetWidth; }, - getHeight: function() { return cel.offsetHeight; } + getWidth: function() { return cel.getBoundingClientRect().width; }, + getHeight: function() { return cel.getBoundingClientRect().height; } }; } else { @@ -3323,8 +3321,8 @@ el.style.height = px(sizing.height); return { - getWidth: function() { return el.offsetWidth; }, - getHeight: function() { return el.offsetHeight; } + getWidth: function() { return cel.getBoundingClientRect().width; }, + getHeight: function() { return cel.getBoundingClientRect().height; } }; } } @@ -3548,8 +3546,8 @@ elementData(el, "initialized", true); if (bindingDef.initialize) { - var result = bindingDef.initialize(el, el.offsetWidth, - el.offsetHeight); + var rect = el.getBoundingClientRect(); + var result = bindingDef.initialize(el, rect.width, rect.height); elementData(el, "init_result", result); } } @@ -3591,29 +3589,30 @@ forEach(matches, function(el) { var sizeObj = initSizing(el, binding); + var getSize = function(el) { + if (sizeObj) { + return {w: sizeObj.getWidth(), h: sizeObj.getHeight()} + } else { + var rect = el.getBoundingClientRect(); + return {w: rect.width, h: rect.height} + } + }; + if (hasClass(el, "html-widget-static-bound")) return; el.className = el.className + " html-widget-static-bound"; var initResult; if (binding.initialize) { - initResult = binding.initialize(el, - sizeObj ? sizeObj.getWidth() : el.offsetWidth, - sizeObj ? sizeObj.getHeight() : el.offsetHeight - ); + var size = getSize(el); + initResult = binding.initialize(el, size.w, size.h); elementData(el, "init_result", initResult); } if (binding.resize) { - var lastSize = { - w: sizeObj ? sizeObj.getWidth() : el.offsetWidth, - h: sizeObj ? sizeObj.getHeight() : el.offsetHeight - }; + var lastSize = getSize(el); var resizeHandler = function(e) { - var size = { - w: sizeObj ? sizeObj.getWidth() : el.offsetWidth, - h: sizeObj ? sizeObj.getHeight() : el.offsetHeight - }; + var size = getSize(el); if (size.w === 0 && size.h === 0) return; if (size.w === lastSize.w && size.h === lastSize.h) @@ -3915,7 +3914,6 @@ return result; } })(); - +
+
@@ -9415,23 +9409,28 @@
Show the code -
corr_vars <- c("total_pop",
-               "med_inc",
-               "percent_nonwhite",
-               "percent_renters",
-               "rent_burden",
-               "ext_rent_burden")
-
-corr_dat <- permits_bg %>% select(all_of(corr_vars)) %>% select(where(is.numeric)) %>% st_drop_geometry() %>% unique() %>% na.omit()
-
-corr <- round(cor(corr_dat), 2)
-p.mat <- cor_pmat(corr_dat)
-
-ggcorrplot(corr, p.mat = p.mat, hc.order = TRUE,
-    type = "full", insig = "blank", lab = TRUE, colors = c(palette[2], "white", palette[3]))
+
corr_vars <- c("total_pop",
+               "med_inc",
+               "percent_nonwhite",
+               "percent_renters",
+               "rent_burden",
+               "ext_rent_burden")
+
+corr_dat <- permits_bg %>% select(all_of(corr_vars), permits_count) %>% select(where(is.numeric)) %>% st_drop_geometry() %>% unique() %>% na.omit()
+
+corr <- round(cor(corr_dat), 2)
+p.mat <- cor_pmat(corr_dat)
+
+ggcorrplot(corr, p.mat = p.mat, hc.order = TRUE,
+    type = "full", insig = "blank", lab = TRUE, colors = c(palette[2], "white", palette[3])) +
+  annotate(
+  geom = "rect",
+  xmin = .5, xmax = 7.5, ymin = 4.5, ymax = 5.5,
+  fill = "transparent", color = "red", alpha = 0.5
+)
-

+

@@ -9441,57 +9440,57 @@
Show the code -
lisa <- permits_bg %>% 
-  filter(year == 2023) %>%
-  mutate(nb = st_contiguity(geometry),
-                         wt = st_weights(nb),
-                         permits_lag = st_lag(permits_count, nb, wt),
-          moran = local_moran(permits_count, nb, wt)) %>% 
-  tidyr::unnest(moran) %>% 
-  mutate(pysal = ifelse(p_folded_sim <= 0.1, as.character(pysal), NA),
-         hotspot = case_when(
-           pysal == "High-High" ~ "Signficant",
-           TRUE ~ "Not Signficant"
-         ))
-
-# 
-# palette <- c("High-High" = "#B20016", 
-#              "Low-Low" = "#1C4769", 
-#              "Low-High" = "#24975E", 
-#              "High-Low" = "#EACA97")
-
-morans_i <- tm_shape(lisa) +
-  tm_polygons(col = "ii", border.alpha = 0, style = "jenks", palette = 'viridis')
-
-p_value <- tm_shape(lisa) +
-  tm_polygons(col = "p_ii", border.alpha = 0, style = "jenks", palette = '-viridis')
-
-sig_hotspots <- tm_shape(lisa) +
-  tm_polygons(col = "hotspot", border.alpha = 0, style = "cat", palette = 'viridis', textNA = "Not a Hotspot")
-
-tmap_arrange(morans_i, p_value, sig_hotspots, ncol = 3)
+
lisa <- permits_bg %>% 
+  filter(year == 2023) %>%
+  mutate(nb = st_contiguity(geometry),
+                         wt = st_weights(nb),
+                         permits_lag = st_lag(permits_count, nb, wt),
+          moran = local_moran(permits_count, nb, wt)) %>% 
+  tidyr::unnest(moran) %>% 
+  mutate(pysal = ifelse(p_folded_sim <= 0.1, as.character(pysal), NA),
+         hotspot = case_when(
+           pysal == "High-High" ~ "Signficant",
+           TRUE ~ "Not Signficant"
+         ))
+
+# 
+# palette <- c("High-High" = "#B20016", 
+#              "Low-Low" = "#1C4769", 
+#              "Low-High" = "#24975E", 
+#              "High-Low" = "#EACA97")
+
+morans_i <- tm_shape(lisa) +
+  tm_polygons(col = "ii", border.alpha = 0, style = "jenks", palette = mono_5_green)
+
+p_value <- tm_shape(lisa) +
+  tm_polygons(col = "p_ii", border.alpha = 0, style = "jenks", palette = mono_5_green)
+
+sig_hotspots <- tm_shape(lisa) +
+  tm_polygons(col = "hotspot", border.alpha = 0, style = "cat", palette = c(palette[2], palette[3]), textNA = "Not a Hotspot")
+
+tmap_arrange(morans_i, p_value, sig_hotspots, ncol = 3)
-

+

Emergeging hotspots

Show the code -
# stc <- as_spacetime(permits_bg,
-#                  .loc_col = "geoid10",
-#                  .time_col = "year")
-# 
-# # conduct EHSA
-# ehsa <- emerging_hotspot_analysis(
-#   x = stc, 
-#   .var = "permits_count", 
-#   k = 1, 
-#   nsim = 5
-# )
-# 
-# count(ehsa, classification)
+
# stc <- as_spacetime(permits_bg %>% select(permits_count, geoid10, year) %>% na.omit(),
+#                  .loc_col = "geoid10",
+#                  .time_col = "year")
+# 
+# # conduct EHSA
+# ehsa <- emerging_hotspot_analysis(
+#   x = stc,
+#   .var = "permits_count",
+#   k = 1,
+#   nsim = 5
+# )
+# 
+# count(ehsa, classification)
@@ -9500,21 +9499,21 @@

Show the code -
permits_bg_long <- permits_bg %>%
-                    filter(!year %in% c(2024)) %>%
-                    st_drop_geometry() %>%
-                    pivot_longer(
-                      cols = c(starts_with("lag"), dist_to_2022),
-                      names_to = "Variable",
-                      values_to = "Value"
-                    )
-
-
-ggscatter(permits_bg_long, x = "permits_count", y = "Value", facet.by = "Variable",
-   add = "reg.line",
-   add.params = list(color = palette[3], fill = palette[5]),
-   conf.int = TRUE
-   ) + stat_cor(method = "pearson", p.accuracy = 0.001, r.accuracy = 0.01)
+
permits_bg_long <- permits_bg %>%
+                    filter(!year %in% c(2024)) %>%
+                    st_drop_geometry() %>%
+                    pivot_longer(
+                      cols = c(starts_with("lag"), dist_to_2022),
+                      names_to = "Variable",
+                      values_to = "Value"
+                    )
+
+
+ggscatter(permits_bg_long, x = "permits_count", y = "Value", facet.by = "Variable",
+   add = "reg.line",
+   add.params = list(color = palette[3], fill = palette[5]),
+   conf.int = TRUE
+   ) + stat_cor(method = "pearson", p.accuracy = 0.001, r.accuracy = 0.01)

@@ -9536,94 +9535,147 @@

Show the code -
permits_train <- filter(permits_bg %>% select(-c(mapname, geoid10)), year < 2022)
-permits_test <- filter(permits_bg %>% select(-c(mapname, geoid10)), year == 2022)
-permits_validate <- filter(permits_bg %>% select(-c(mapname, geoid10)), year == 2023)
-permits_predict <- filter(permits_bg %>% select(-c(mapname, geoid10)), year == 2024)
-
-reg <- lm(permits_count ~ ., data = st_drop_geometry(permits_train))
-
-predictions <- predict(reg, permits_test)
-predictions <- cbind(permits_test, predictions)
-
-predictions <- predictions %>%
-                  mutate(abs_error = abs(permits_count - predictions),
-                         pct_error = abs_error / permits_count)
-
-ggplot(predictions, aes(x = permits_count, y = predictions)) +
-  geom_point() +
-  labs(title = "Predicted vs. Actual Permits",
-       subtitle = "2022") +
-  geom_smooth(method = "lm", se = FALSE)
+
ggplot(ols_preds, aes(x = permits_count, y = ols_preds)) +
+  geom_point() +
+  labs(title = "Predicted vs. Actual Permits",
+       subtitle = "2022") +
+  geom_smooth(method = "lm", se = FALSE)
-

+

Show the code -
mae <- paste0("MAE: ", round(mean(predictions$abs_error, na.rm = TRUE), 2))
+
ggplot(ols_preds, aes(x = abs_error)) +
+  geom_histogram() +
+  labs(title = "Distribution of Absolute Error per Block Group",
+       subtitle = "OLS, 2022")
+
+
+

+
+
+Show the code +
ols_mae <- paste0("MAE: ", round(mean(ols_preds$abs_error, na.rm = TRUE), 2))
 
-tm_shape(predictions) +
-        tm_polygons(col = "abs_error", border.alpha = 0, palette = 'viridis', style = "fisher", colorNA = "lightgrey") +
+ols_preds_map <- tm_shape(ols_preds) +
+        tm_polygons(col = "ols_preds", border.alpha = 0, palette = mono_5_green, style = "fisher", colorNA = "lightgrey") +
   tm_shape(broad_and_market) +
   tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE) 
+ tm_layout(frame = FALSE) + +ols_error_map <- tm_shape(ols_preds) + + tm_polygons(col = "abs_error", border.alpha = 0, palette = mono_5_orange, style = "fisher", colorNA = "lightgrey") + + tm_shape(broad_and_market) + + tm_lines(col = "lightgrey") + + tm_layout(frame = FALSE) + +tmap_arrange(ols_preds_map, ols_error_map)

-

+

We find that our OLS model has an MAE of only MAE: 2.66–not bad for such a simple model! Still, it struggles most in the areas where we most need it to succeed, so we will try to introduce better variables and apply a more complex model to improve our predictions.

-
-

4.2 Random Forest Regression

+
+

4.2 Random Forest Regression: Testing

+

We train and test up to 2022–we use this for model tuning and feature engineering.

Show the code -
rf <- randomForest(permits_count ~ ., 
-                   data = st_drop_geometry(permits_train),
-                   importance = TRUE, 
-                   na.action = na.omit)
-
-rf_predictions <- predict(rf, permits_test)
-rf_predictions <- cbind(permits_test, rf_predictions)
-rf_predictions <- rf_predictions %>%
-                  mutate(abs_error = abs(permits_count - rf_predictions),
-                         pct_error = abs_error / (permits_count + 0.0001))
-
-tm_shape(rf_predictions) +
-        tm_polygons(col = "rf_predictions", border.alpha = 0, palette = mono_5_green, style = "fisher", colorNA = "lightgrey") +
-  tm_shape(broad_and_market) +
-  tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE) 
+
test_preds_map <- tm_shape(rf_test_preds) +
+        tm_polygons(col = "rf_test_preds", border.alpha = 0, palette = mono_5_green, style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
+
+test_error_map <- tm_shape(rf_test_preds) +
+        tm_polygons(col = "abs_error", border.alpha = 0, palette = mono_5_orange, style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
+
+tmap_arrange(test_preds_map, test_error_map)
-

+

Show the code -
ggplot(rf_predictions, aes(x = permits_count, y = rf_predictions)) +
-  geom_point() +
-  labs(title = "Predicted vs. Actual Permits",
-       subtitle = "2022") +
-  geom_smooth(method = "lm", se = FALSE)
+
ggplot(rf_test_preds, aes(x = abs_error)) +
+  geom_histogram() +
+  labs(title = "Distribution of Absolute Error per Block Group",
+       subtitle = "Random Forest, 2022")
-

+

Show the code -
rf_mae <- paste0("MAE: ", round(mean(rf_predictions$abs_error, na.rm = TRUE), 2))
-
-tm_shape(rf_predictions) +
-        tm_polygons(col = "abs_error", border.alpha = 0, palette = mono_5_orange, style = "fisher", colorNA = "lightgrey") +
-  tm_shape(broad_and_market) +
-  tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE) 
+
ggplot(rf_test_preds, aes(x = permits_count, y = rf_test_preds)) +
+  geom_point() +
+  labs(title = "Predicted vs. Actual Permits",
+       subtitle = "2022") +
+  geom_smooth(method = "lm", se = FALSE)
-

+

+
+Show the code +
rf_test_mae <- paste0("MAE: ", round(mean(rf_test_preds$abs_error, na.rm = TRUE), 2))
+
+
+
+
+

4.3 Random Forest Regression: Validation

+

Having settled on our model features and tuning, we now validate on 2023 data.

+
+
+Show the code +
val_preds_map <- tm_shape(rf_val_preds) +
+        tm_polygons(col = "rf_val_preds", border.alpha = 0, palette = mono_5_green, style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
+
+val_error_map <- tm_shape(rf_val_preds) +
+        tm_polygons(col = "abs_error", border.alpha = 0, palette = mono_5_orange, style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
+
+tmap_arrange(val_preds_map, val_error_map)
+
+
+

+
+
+Show the code +
ggplot(rf_val_preds, aes(x = abs_error)) +
+  geom_histogram() +
+  labs(title = "Distribution of Absolute Error per Block Group",
+       subtitle = "Random Forest, 2023")
+
+
+

+
+
+Show the code +
ggplot(rf_val_preds, aes(x = permits_count, y = rf_val_preds)) +
+  geom_point() +
+  labs(title = "Predicted vs. Actual Permits",
+       subtitle = "2023") +
+  geom_smooth(method = "lm", se = FALSE)
+
+
+

+
+
+Show the code +
rf_val_mae <- paste0("MAE: ", round(mean(rf_val_preds$abs_error, na.rm = TRUE), 2))
+
@@ -9635,23 +9687,16 @@

Show the code -
nbins <- as.integer(sqrt(nrow(rf_predictions)))
-vline <- mean(rf_predictions$abs_error, na.rm = TRUE)
-
-ggplot(rf_predictions, aes(x = abs_error)) +
-  geom_histogram(bins = nbins) +
-  geom_vline(aes(xintercept = vline))
+
nbins <- as.integer(sqrt(nrow(rf_val_preds)))
+vline <- mean(rf_val_preds$abs_error, na.rm = TRUE)
+
+ggplot(rf_val_preds, aes(x = abs_error)) +
+  geom_histogram(bins = nbins) +
+  geom_vline(aes(xintercept = vline))
-

+

-
-Show the code -
hmm <- permits_bg %>%
-  st_drop_geometry() %>%
-  group_by(year) %>%
-  summarize_all(.funs = list(~sum(is.na(.)))) # Check NA for all columns
-
@@ -9659,78 +9704,78 @@

Show the code -
rf_predictions <- rf_predictions %>%
-                      mutate(race_comp = case_when(
-                        percent_nonwhite >= .50 ~ "Majority Non-White",
-                        TRUE ~ "Majority White"
-                      ))
-
-ggplot(rf_predictions, aes(y = abs_error, color = race_comp)) +
-  geom_boxplot(fill = NA)
+
rf_val_preds <- rf_val_preds %>%
+                      mutate(race_comp = case_when(
+                        percent_nonwhite >= .50 ~ "Majority Non-White",
+                        TRUE ~ "Majority White"
+                      ))
+
+ggplot(rf_val_preds, aes(y = abs_error, color = race_comp)) +
+  geom_boxplot(fill = NA)
-

+

We find that error is not related to affordability and actually trends downward with percent nonwhite. (This is probably because there is less total development happening there in majority-minority neighborhoods to begin with, so the magnitude of error is less, even though proportionally it might be more.) Error increases slightly with total pop. This makes sense–more people –> more development.

Show the code -
ggplot(rf_predictions, aes(y = abs_error, x = rent_burden)) + # or whatever the variable is
-  geom_point() +
-  geom_smooth(method = "lm", se= FALSE) +
-  theme_minimal()
+
ggplot(rf_val_preds, aes(y = abs_error, x = rent_burden)) + # or whatever the variable is
+  geom_point() +
+  geom_smooth(method = "lm", se= FALSE) +
+  theme_minimal()
-

+

Show the code -
ggplot(rf_predictions, aes(y = abs_error, x = percent_nonwhite)) + # or whatever the variable is
-  geom_point() +
-  geom_smooth(method = "lm", se= FALSE) +
-  theme_minimal()
+
ggplot(rf_val_preds, aes(y = abs_error, x = percent_nonwhite)) + # or whatever the variable is
+  geom_point() +
+  geom_smooth(method = "lm", se= FALSE) +
+  theme_minimal()
-

+

Show the code -
ggplot(rf_predictions, aes(y = abs_error, x = total_pop)) + # or whatever the variable is
-  geom_point() +
-  geom_smooth(method = "lm", se= FALSE) +
-  theme_minimal()
+
ggplot(rf_val_preds, aes(y = abs_error, x = total_pop)) + # or whatever the variable is
+  geom_point() +
+  geom_smooth(method = "lm", se= FALSE) +
+  theme_minimal()
-

+

Show the code -
ggplot(rf_predictions, aes(y = abs_error, x = med_inc)) + # or whatever the variable is
-  geom_point() +
-  geom_smooth(method = "lm", se= FALSE) +
-  theme_minimal()
+
ggplot(rf_val_preds, aes(y = abs_error, x = med_inc)) + # or whatever the variable is
+  geom_point() +
+  geom_smooth(method = "lm", se= FALSE) +
+  theme_minimal()
-

+

How does this generalize across council districts? Don’t forget to refactor

Show the code -
suppressMessages(
-  ggplot(rf_predictions, aes(x = reorder(district, abs_error, FUN = mean), y = abs_error)) +
-    geom_boxplot(fill = NA) +
-    labs(title = "MAE by Council District",
-         y = "Mean Absolute Error",
-         x = "Council District") +
-    theme_minimal() +
-    theme()
-)
+
suppressMessages(
+  ggplot(rf_val_preds, aes(x = reorder(district, abs_error, FUN = mean), y = abs_error)) +
+    geom_boxplot(fill = NA) +
+    labs(title = "MAE by Council District",
+         y = "Mean Absolute Error",
+         x = "Council District") +
+    theme_minimal() +
+    theme()
+)
-

+

@@ -9741,17 +9786,17 @@

Show the code -
filtered_zoning <- zoning %>%
-                     filter(str_detect(CODE, "RS") | str_detect(CODE, "I"),
-                            CODE != "I2",
-                            !str_detect(CODE, "SP"))
-
-
-tm_shape(filtered_zoning) +
-        tm_polygons(col = "CODE", border.alpha = 0, colorNA = "lightgrey") +
-  tm_shape(broad_and_market) +
-  tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE)
+
filtered_zoning <- zoning %>%
+                     filter(str_detect(CODE, "RS") | str_detect(CODE, "I"),
+                            CODE != "I2",
+                            !str_detect(CODE, "SP"))
+
+
+tm_shape(filtered_zoning) +
+        tm_polygons(col = "CODE", border.alpha = 0, colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)

@@ -9761,85 +9806,85 @@

Show the code -
filtered_zoning <- st_join(
-  filtered_zoning,
-  rf_predictions %>% select(rf_predictions)
-)
-
-tm_shape(filtered_zoning) +
-        tm_polygons(col = "rf_predictions", border.alpha = 0, colorNA = "lightgrey", palette = mono_5_orange, style = "fisher") +
-  tm_shape(broad_and_market) +
-  tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE)
+
filtered_zoning <- st_join(
+  filtered_zoning,
+  rf_val_preds %>% select(rf_val_preds)
+)
+
+tm_shape(filtered_zoning) +
+        tm_polygons(col = "rf_val_preds", border.alpha = 0, colorNA = "lightgrey", palette = mono_5_orange, style = "fisher") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
-

+

Show the code -
tmap_mode('view')
-
-filtered_zoning %>%
-  filter(rf_predictions > 10) %>%
-tm_shape() +
-        tm_polygons(col = "CODE", border.alpha = 0, colorNA = "lightgrey",
-                    popup.vars = c('rf_predictions', 'CODE')) +
-  tm_shape(broad_and_market) +
-  tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE)
+
tmap_mode('view')
+
+filtered_zoning %>%
+  filter(rf_val_preds > 10) %>%
+tm_shape() +
+        tm_polygons(col = "CODE", border.alpha = 0, colorNA = "lightgrey",
+                    popup.vars = c('rf_val_preds', 'CODE')) +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
-
- +
+

Furthermore, we can identify properties with high potential for assemblage, which suggests the ability to accomodate high-density, multi-unit housing.

Show the code -
nbs <- filtered_zoning %>% 
-  mutate(nb = st_contiguity(geometry))
-
-# Create edge list while handling cases with no neighbors
-edge_list <- tibble::tibble(id = 1:length(nbs$nb), nbs = nbs$nb) %>% 
-  tidyr::unnest(nbs) %>% 
-  filter(nbs != 0)
-
-# Create a graph with a node for each row in filtered_zoning
-g <- make_empty_graph(n = nrow(filtered_zoning))
-V(g)$name <- as.character(1:nrow(filtered_zoning))
-
-# Add edges if they exist
-if (nrow(edge_list) > 0) {
-  edges <- as.matrix(edge_list)
-  g <- add_edges(g, c(t(edges)))
-}
-
-# Calculate the number of contiguous neighbors, handling nodes without neighbors
-n_contiguous <- sapply(V(g)$name, function(node) {
-  if (node %in% edges) {
-    length(neighborhood(g, order = 1, nodes = as.numeric(node))[[1]])
-  } else {
-    1  # Nodes without neighbors count as 1 (themselves)
-  }
-})
-
-filtered_zoning <- filtered_zoning %>%
-                    mutate(n_contig = n_contiguous)
-
-filtered_zoning %>%
-  st_drop_geometry() %>%
-  select(rf_predictions,
-         n_contig,
-         OBJECTID,
-         CODE) %>%
-  filter(rf_predictions > 10,
-         n_contig > 2) %>%
-  arrange(desc(rf_predictions)) %>%
-  kablerize(caption = "Poorly-Zoned Properties with High Development Risk")
+
nbs <- filtered_zoning %>% 
+  mutate(nb = st_contiguity(geometry))
+
+# Create edge list while handling cases with no neighbors
+edge_list <- tibble::tibble(id = 1:length(nbs$nb), nbs = nbs$nb) %>% 
+  tidyr::unnest(nbs) %>% 
+  filter(nbs != 0)
+
+# Create a graph with a node for each row in filtered_zoning
+g <- make_empty_graph(n = nrow(filtered_zoning))
+V(g)$name <- as.character(1:nrow(filtered_zoning))
+
+# Add edges if they exist
+if (nrow(edge_list) > 0) {
+  edges <- as.matrix(edge_list)
+  g <- add_edges(g, c(t(edges)))
+}
+
+# Calculate the number of contiguous neighbors, handling nodes without neighbors
+n_contiguous <- sapply(V(g)$name, function(node) {
+  if (node %in% edges) {
+    length(neighborhood(g, order = 1, nodes = as.numeric(node))[[1]])
+  } else {
+    1  # Nodes without neighbors count as 1 (themselves)
+  }
+})
+
+filtered_zoning <- filtered_zoning %>%
+                    mutate(n_contig = n_contiguous)
+
+filtered_zoning %>%
+  st_drop_geometry() %>%
+  select(rf_val_preds,
+         n_contig,
+         OBJECTID,
+         CODE) %>%
+  filter(rf_val_preds > 10,
+         n_contig > 2) %>%
+  arrange(desc(rf_val_preds)) %>%
+  kablerize(caption = "Poorly-Zoned Properties with High Development Risk")
@@ -9847,7 +9892,7 @@

- + @@ -9855,368 +9900,270 @@

- - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - + + + + + + + + - + - + - + - - - + + + - + - + - + - + - + - - - - - - - - - + + - + - - - - - - - - - - - - - - - - + + - + - - + + - - - - - - - - + - - - - - - - - - - - - - - - - + + - + - - - - - - - - - + + - + - - + + - + - - + + - + - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - + - + - + - - + + - - + + - - - - - - - - - + + - + - - - - - - - - - + + - + - - + + - - + + - - - - - + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - + + + + + - - - - - - - - + - + - + - + - + - + - + - + - + - - - - - - - - - + + - - + + - - - - - + + + + +
rf_predictionsrf_val_preds n_contig OBJECTID CODE
751744.2962086827.06613 316717RSA51615ICMX
495741.61663154827.06613 310410ICMX2736IRMX
495841.61663158727.06613 310411RSA52804IRMX
495941.61663342027.06613 310412ICMX6405RSA5
524541.61663466727.06613 3111609661 RSA5
916927.06613420073ICMX
176834.2469022.24860 3 3128 IRMX
364034.2469022.24860 3 6901 ICMX
446029.67577
751721.08143 3909316717 RSA5
393428.7265720.84390 3 7646 ICMX
1232628.7265720.84390 4 25776 RSA5
1357824.43513327869IRMX
86823.84580495720.67827 3161510410 ICMX
154823.8458032736IRMX
158723.8458032804IRMX
342023.84580495820.67827 3640510411 RSA5
466723.84580495920.67827 39661RSA5
916923.8458042007310412 ICMX
508819.26747310759IRMX
772618.15027317168ICMX
783318.11450524520.67827 31740811160 RSA5
664516.99250314648ICMX
728016.99250446017.31087 3161799093 RSA5
991216.99250772615.16207 32152717168 ICMX
395715.647831357815.12210 3770427869 IRMX
596414.50423312931ICMX
639614.50423313980RSA3
654014.50423314372RSA5
655014.50423508815.00973 314401RSA5
213813.645004374410759 IRMX
451213.2790314.95163 5 9243 IRMX
601413.2790314.95163 6 13057 ICMX
577613.26503304112.82333 312473RSA55568ICMX
825213.26503318254RSA3
1284013.26503984212.82333 32662721369 RSA5
620012.71553413532ICMX
669112.43360984312.82333 31474721370 ICMX
414612.33040984512.82333 38265IRMX21372RSA5
510812.33040410795IRMX783312.25843317408RSA5
431812.08010395711.49060 38705RSD37704IRMX
1327012.08010664511.22550 327311RSD114648ICMX
13750.112.08010728011.22550 328226RSA316179RSA5
1334010.30770991211.22550 327419RSD321527ICMX
1403310.30770328807RSD3213810.8825343744IRMX
1403410.30770328808RSD1
814310.2576010.71940 3 18031 RSD3
865610.2576010.71940 3 19076 RSA3
940910.2576010.71940 4 20534 RSA2
1017510.2576010.71940 3 22002 RSD1
1260510.2576010.71940 3 26247 RSD1
570610.09430312329ICMX
300710.07567414610.39053 35506RSA58265IRMX
455810.0756739370ICMX510810.39053410795IRMX
@@ -10225,49 +10172,45 @@

Show the code -
tmap_mode('view')
-
-filtered_zoning %>%
-  select(rf_predictions,
-         n_contig,
-         OBJECTID,
-         CODE) %>%
-  filter(rf_predictions > 10,
-         n_contig > 2) %>%
-tm_shape() +
-        tm_polygons(col = "rf_predictions", border.alpha = 0, colorNA = "lightgrey", palette = "viridis", style = "fisher",
-                    popup.vars = c('rf_predictions', 'n_contig', 'CODE'), alpha = 0.5) +
-  tm_shape(broad_and_market) +
-  tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE)
+
tmap_mode('view')
+
+filtered_zoning %>%
+  select(rf_val_preds,
+         n_contig,
+         OBJECTID,
+         CODE) %>%
+  filter(rf_val_preds > 10,
+         n_contig > 2) %>%
+tm_shape() +
+        tm_polygons(col = "rf_val_preds", border.alpha = 0, colorNA = "lightgrey", palette = "viridis", style = "fisher",
+                    popup.vars = c('rf_val_preds', 'n_contig', 'CODE'), alpha = 0.5) +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
-
- +
+

-
-

5.4 2024 Predictions

-

Just for shits and giggles, throw in 2024 predictions. (Can use data from 2023.)

+
+

5.4 Random Forest Regression: Projection

+

Just for shits and giggles, we can now predict for 2024.

Show the code -
rf_predictions_2024 <- predict(rf, permits_predict)
-rf_predictions_2024 <- cbind(permits_predict, rf_predictions_2024)
-
-
-tm_shape(rf_predictions_2024) +
-        tm_polygons(col = "rf_predictions_2024", border.alpha = 0, palette = mono_5_green, style = "fisher", colorNA = "lightgrey") +
-  tm_shape(broad_and_market) +
-  tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE) 
+
tmap_mode('plot')
+
+tm_shape(rf_proj_preds) +
+        tm_polygons(col = "rf_proj_preds", border.alpha = 0, palette = mono_5_green, style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
- -
- +

diff --git a/index.html b/index.html index 99f12e1..3e4c12a 100644 --- a/index.html +++ b/index.html @@ -3302,20 +3302,18 @@ document.body.style.height = "100%"; document.documentElement.style.width = "100%"; document.documentElement.style.height = "100%"; - if (cel) { - cel.style.position = "absolute"; - var pad = unpackPadding(sizing.padding); - cel.style.top = pad.top + "px"; - cel.style.right = pad.right + "px"; - cel.style.bottom = pad.bottom + "px"; - cel.style.left = pad.left + "px"; - el.style.width = "100%"; - el.style.height = "100%"; - } + cel.style.position = "absolute"; + var pad = unpackPadding(sizing.padding); + cel.style.top = pad.top + "px"; + cel.style.right = pad.right + "px"; + cel.style.bottom = pad.bottom + "px"; + cel.style.left = pad.left + "px"; + el.style.width = "100%"; + el.style.height = "100%"; return { - getWidth: function() { return cel.offsetWidth; }, - getHeight: function() { return cel.offsetHeight; } + getWidth: function() { return cel.getBoundingClientRect().width; }, + getHeight: function() { return cel.getBoundingClientRect().height; } }; } else { @@ -3323,8 +3321,8 @@ el.style.height = px(sizing.height); return { - getWidth: function() { return el.offsetWidth; }, - getHeight: function() { return el.offsetHeight; } + getWidth: function() { return cel.getBoundingClientRect().width; }, + getHeight: function() { return cel.getBoundingClientRect().height; } }; } } @@ -3548,8 +3546,8 @@ elementData(el, "initialized", true); if (bindingDef.initialize) { - var result = bindingDef.initialize(el, el.offsetWidth, - el.offsetHeight); + var rect = el.getBoundingClientRect(); + var result = bindingDef.initialize(el, rect.width, rect.height); elementData(el, "init_result", result); } } @@ -3591,29 +3589,30 @@ forEach(matches, function(el) { var sizeObj = initSizing(el, binding); + var getSize = function(el) { + if (sizeObj) { + return {w: sizeObj.getWidth(), h: sizeObj.getHeight()} + } else { + var rect = el.getBoundingClientRect(); + return {w: rect.width, h: rect.height} + } + }; + if (hasClass(el, "html-widget-static-bound")) return; el.className = el.className + " html-widget-static-bound"; var initResult; if (binding.initialize) { - initResult = binding.initialize(el, - sizeObj ? sizeObj.getWidth() : el.offsetWidth, - sizeObj ? sizeObj.getHeight() : el.offsetHeight - ); + var size = getSize(el); + initResult = binding.initialize(el, size.w, size.h); elementData(el, "init_result", initResult); } if (binding.resize) { - var lastSize = { - w: sizeObj ? sizeObj.getWidth() : el.offsetWidth, - h: sizeObj ? sizeObj.getHeight() : el.offsetHeight - }; + var lastSize = getSize(el); var resizeHandler = function(e) { - var size = { - w: sizeObj ? sizeObj.getWidth() : el.offsetWidth, - h: sizeObj ? sizeObj.getHeight() : el.offsetHeight - }; + var size = getSize(el); if (size.w === 0 && size.h === 0) return; if (size.w === lastSize.w && size.h === lastSize.h) @@ -3915,7 +3914,6 @@ return result; } })(); - +
+
@@ -9415,23 +9409,28 @@
Show the code -
corr_vars <- c("total_pop",
-               "med_inc",
-               "percent_nonwhite",
-               "percent_renters",
-               "rent_burden",
-               "ext_rent_burden")
-
-corr_dat <- permits_bg %>% select(all_of(corr_vars)) %>% select(where(is.numeric)) %>% st_drop_geometry() %>% unique() %>% na.omit()
-
-corr <- round(cor(corr_dat), 2)
-p.mat <- cor_pmat(corr_dat)
-
-ggcorrplot(corr, p.mat = p.mat, hc.order = TRUE,
-    type = "full", insig = "blank", lab = TRUE, colors = c(palette[2], "white", palette[3]))
+
corr_vars <- c("total_pop",
+               "med_inc",
+               "percent_nonwhite",
+               "percent_renters",
+               "rent_burden",
+               "ext_rent_burden")
+
+corr_dat <- permits_bg %>% select(all_of(corr_vars), permits_count) %>% select(where(is.numeric)) %>% st_drop_geometry() %>% unique() %>% na.omit()
+
+corr <- round(cor(corr_dat), 2)
+p.mat <- cor_pmat(corr_dat)
+
+ggcorrplot(corr, p.mat = p.mat, hc.order = TRUE,
+    type = "full", insig = "blank", lab = TRUE, colors = c(palette[2], "white", palette[3])) +
+  annotate(
+  geom = "rect",
+  xmin = .5, xmax = 7.5, ymin = 4.5, ymax = 5.5,
+  fill = "transparent", color = "red", alpha = 0.5
+)
-

+

@@ -9441,57 +9440,57 @@
Show the code -
lisa <- permits_bg %>% 
-  filter(year == 2023) %>%
-  mutate(nb = st_contiguity(geometry),
-                         wt = st_weights(nb),
-                         permits_lag = st_lag(permits_count, nb, wt),
-          moran = local_moran(permits_count, nb, wt)) %>% 
-  tidyr::unnest(moran) %>% 
-  mutate(pysal = ifelse(p_folded_sim <= 0.1, as.character(pysal), NA),
-         hotspot = case_when(
-           pysal == "High-High" ~ "Signficant",
-           TRUE ~ "Not Signficant"
-         ))
-
-# 
-# palette <- c("High-High" = "#B20016", 
-#              "Low-Low" = "#1C4769", 
-#              "Low-High" = "#24975E", 
-#              "High-Low" = "#EACA97")
-
-morans_i <- tm_shape(lisa) +
-  tm_polygons(col = "ii", border.alpha = 0, style = "jenks", palette = 'viridis')
-
-p_value <- tm_shape(lisa) +
-  tm_polygons(col = "p_ii", border.alpha = 0, style = "jenks", palette = '-viridis')
-
-sig_hotspots <- tm_shape(lisa) +
-  tm_polygons(col = "hotspot", border.alpha = 0, style = "cat", palette = 'viridis', textNA = "Not a Hotspot")
-
-tmap_arrange(morans_i, p_value, sig_hotspots, ncol = 3)
+
lisa <- permits_bg %>% 
+  filter(year == 2023) %>%
+  mutate(nb = st_contiguity(geometry),
+                         wt = st_weights(nb),
+                         permits_lag = st_lag(permits_count, nb, wt),
+          moran = local_moran(permits_count, nb, wt)) %>% 
+  tidyr::unnest(moran) %>% 
+  mutate(pysal = ifelse(p_folded_sim <= 0.1, as.character(pysal), NA),
+         hotspot = case_when(
+           pysal == "High-High" ~ "Signficant",
+           TRUE ~ "Not Signficant"
+         ))
+
+# 
+# palette <- c("High-High" = "#B20016", 
+#              "Low-Low" = "#1C4769", 
+#              "Low-High" = "#24975E", 
+#              "High-Low" = "#EACA97")
+
+morans_i <- tm_shape(lisa) +
+  tm_polygons(col = "ii", border.alpha = 0, style = "jenks", palette = mono_5_green)
+
+p_value <- tm_shape(lisa) +
+  tm_polygons(col = "p_ii", border.alpha = 0, style = "jenks", palette = mono_5_green)
+
+sig_hotspots <- tm_shape(lisa) +
+  tm_polygons(col = "hotspot", border.alpha = 0, style = "cat", palette = c(palette[2], palette[3]), textNA = "Not a Hotspot")
+
+tmap_arrange(morans_i, p_value, sig_hotspots, ncol = 3)
-

+

Emergeging hotspots

Show the code -
# stc <- as_spacetime(permits_bg,
-#                  .loc_col = "geoid10",
-#                  .time_col = "year")
-# 
-# # conduct EHSA
-# ehsa <- emerging_hotspot_analysis(
-#   x = stc, 
-#   .var = "permits_count", 
-#   k = 1, 
-#   nsim = 5
-# )
-# 
-# count(ehsa, classification)
+
# stc <- as_spacetime(permits_bg %>% select(permits_count, geoid10, year) %>% na.omit(),
+#                  .loc_col = "geoid10",
+#                  .time_col = "year")
+# 
+# # conduct EHSA
+# ehsa <- emerging_hotspot_analysis(
+#   x = stc,
+#   .var = "permits_count",
+#   k = 1,
+#   nsim = 5
+# )
+# 
+# count(ehsa, classification)
@@ -9500,21 +9499,21 @@

Show the code -
permits_bg_long <- permits_bg %>%
-                    filter(!year %in% c(2024)) %>%
-                    st_drop_geometry() %>%
-                    pivot_longer(
-                      cols = c(starts_with("lag"), dist_to_2022),
-                      names_to = "Variable",
-                      values_to = "Value"
-                    )
-
-
-ggscatter(permits_bg_long, x = "permits_count", y = "Value", facet.by = "Variable",
-   add = "reg.line",
-   add.params = list(color = palette[3], fill = palette[5]),
-   conf.int = TRUE
-   ) + stat_cor(method = "pearson", p.accuracy = 0.001, r.accuracy = 0.01)
+
permits_bg_long <- permits_bg %>%
+                    filter(!year %in% c(2024)) %>%
+                    st_drop_geometry() %>%
+                    pivot_longer(
+                      cols = c(starts_with("lag"), dist_to_2022),
+                      names_to = "Variable",
+                      values_to = "Value"
+                    )
+
+
+ggscatter(permits_bg_long, x = "permits_count", y = "Value", facet.by = "Variable",
+   add = "reg.line",
+   add.params = list(color = palette[3], fill = palette[5]),
+   conf.int = TRUE
+   ) + stat_cor(method = "pearson", p.accuracy = 0.001, r.accuracy = 0.01)

@@ -9536,94 +9535,147 @@

Show the code -
permits_train <- filter(permits_bg %>% select(-c(mapname, geoid10)), year < 2022)
-permits_test <- filter(permits_bg %>% select(-c(mapname, geoid10)), year == 2022)
-permits_validate <- filter(permits_bg %>% select(-c(mapname, geoid10)), year == 2023)
-permits_predict <- filter(permits_bg %>% select(-c(mapname, geoid10)), year == 2024)
-
-reg <- lm(permits_count ~ ., data = st_drop_geometry(permits_train))
-
-predictions <- predict(reg, permits_test)
-predictions <- cbind(permits_test, predictions)
-
-predictions <- predictions %>%
-                  mutate(abs_error = abs(permits_count - predictions),
-                         pct_error = abs_error / permits_count)
-
-ggplot(predictions, aes(x = permits_count, y = predictions)) +
-  geom_point() +
-  labs(title = "Predicted vs. Actual Permits",
-       subtitle = "2022") +
-  geom_smooth(method = "lm", se = FALSE)
+
ggplot(ols_preds, aes(x = permits_count, y = ols_preds)) +
+  geom_point() +
+  labs(title = "Predicted vs. Actual Permits",
+       subtitle = "2022") +
+  geom_smooth(method = "lm", se = FALSE)
-

+

Show the code -
mae <- paste0("MAE: ", round(mean(predictions$abs_error, na.rm = TRUE), 2))
+
ggplot(ols_preds, aes(x = abs_error)) +
+  geom_histogram() +
+  labs(title = "Distribution of Absolute Error per Block Group",
+       subtitle = "OLS, 2022")
+
+
+

+
+
+Show the code +
ols_mae <- paste0("MAE: ", round(mean(ols_preds$abs_error, na.rm = TRUE), 2))
 
-tm_shape(predictions) +
-        tm_polygons(col = "abs_error", border.alpha = 0, palette = 'viridis', style = "fisher", colorNA = "lightgrey") +
+ols_preds_map <- tm_shape(ols_preds) +
+        tm_polygons(col = "ols_preds", border.alpha = 0, palette = mono_5_green, style = "fisher", colorNA = "lightgrey") +
   tm_shape(broad_and_market) +
   tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE) 
+ tm_layout(frame = FALSE) + +ols_error_map <- tm_shape(ols_preds) + + tm_polygons(col = "abs_error", border.alpha = 0, palette = mono_5_orange, style = "fisher", colorNA = "lightgrey") + + tm_shape(broad_and_market) + + tm_lines(col = "lightgrey") + + tm_layout(frame = FALSE) + +tmap_arrange(ols_preds_map, ols_error_map)

-

+

We find that our OLS model has an MAE of only MAE: 2.66–not bad for such a simple model! Still, it struggles most in the areas where we most need it to succeed, so we will try to introduce better variables and apply a more complex model to improve our predictions.

-
-

4.2 Random Forest Regression

+
+

4.2 Random Forest Regression: Testing

+

We train and test up to 2022–we use this for model tuning and feature engineering.

Show the code -
rf <- randomForest(permits_count ~ ., 
-                   data = st_drop_geometry(permits_train),
-                   importance = TRUE, 
-                   na.action = na.omit)
-
-rf_predictions <- predict(rf, permits_test)
-rf_predictions <- cbind(permits_test, rf_predictions)
-rf_predictions <- rf_predictions %>%
-                  mutate(abs_error = abs(permits_count - rf_predictions),
-                         pct_error = abs_error / (permits_count + 0.0001))
-
-tm_shape(rf_predictions) +
-        tm_polygons(col = "rf_predictions", border.alpha = 0, palette = mono_5_green, style = "fisher", colorNA = "lightgrey") +
-  tm_shape(broad_and_market) +
-  tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE) 
+
test_preds_map <- tm_shape(rf_test_preds) +
+        tm_polygons(col = "rf_test_preds", border.alpha = 0, palette = mono_5_green, style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
+
+test_error_map <- tm_shape(rf_test_preds) +
+        tm_polygons(col = "abs_error", border.alpha = 0, palette = mono_5_orange, style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
+
+tmap_arrange(test_preds_map, test_error_map)
-

+

Show the code -
ggplot(rf_predictions, aes(x = permits_count, y = rf_predictions)) +
-  geom_point() +
-  labs(title = "Predicted vs. Actual Permits",
-       subtitle = "2022") +
-  geom_smooth(method = "lm", se = FALSE)
+
ggplot(rf_test_preds, aes(x = abs_error)) +
+  geom_histogram() +
+  labs(title = "Distribution of Absolute Error per Block Group",
+       subtitle = "Random Forest, 2022")
-

+

Show the code -
rf_mae <- paste0("MAE: ", round(mean(rf_predictions$abs_error, na.rm = TRUE), 2))
-
-tm_shape(rf_predictions) +
-        tm_polygons(col = "abs_error", border.alpha = 0, palette = mono_5_orange, style = "fisher", colorNA = "lightgrey") +
-  tm_shape(broad_and_market) +
-  tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE) 
+
ggplot(rf_test_preds, aes(x = permits_count, y = rf_test_preds)) +
+  geom_point() +
+  labs(title = "Predicted vs. Actual Permits",
+       subtitle = "2022") +
+  geom_smooth(method = "lm", se = FALSE)
-

+

+
+Show the code +
rf_test_mae <- paste0("MAE: ", round(mean(rf_test_preds$abs_error, na.rm = TRUE), 2))
+
+
+
+
+

4.3 Random Forest Regression: Validation

+

Having settled on our model features and tuning, we now validate on 2023 data.

+
+
+Show the code +
val_preds_map <- tm_shape(rf_val_preds) +
+        tm_polygons(col = "rf_val_preds", border.alpha = 0, palette = mono_5_green, style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
+
+val_error_map <- tm_shape(rf_val_preds) +
+        tm_polygons(col = "abs_error", border.alpha = 0, palette = mono_5_orange, style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
+
+tmap_arrange(val_preds_map, val_error_map)
+
+
+

+
+
+Show the code +
ggplot(rf_val_preds, aes(x = abs_error)) +
+  geom_histogram() +
+  labs(title = "Distribution of Absolute Error per Block Group",
+       subtitle = "Random Forest, 2023")
+
+
+

+
+
+Show the code +
ggplot(rf_val_preds, aes(x = permits_count, y = rf_val_preds)) +
+  geom_point() +
+  labs(title = "Predicted vs. Actual Permits",
+       subtitle = "2023") +
+  geom_smooth(method = "lm", se = FALSE)
+
+
+

+
+
+Show the code +
rf_val_mae <- paste0("MAE: ", round(mean(rf_val_preds$abs_error, na.rm = TRUE), 2))
+
@@ -9635,23 +9687,16 @@

Show the code -
nbins <- as.integer(sqrt(nrow(rf_predictions)))
-vline <- mean(rf_predictions$abs_error, na.rm = TRUE)
-
-ggplot(rf_predictions, aes(x = abs_error)) +
-  geom_histogram(bins = nbins) +
-  geom_vline(aes(xintercept = vline))
+
nbins <- as.integer(sqrt(nrow(rf_val_preds)))
+vline <- mean(rf_val_preds$abs_error, na.rm = TRUE)
+
+ggplot(rf_val_preds, aes(x = abs_error)) +
+  geom_histogram(bins = nbins) +
+  geom_vline(aes(xintercept = vline))
-

+

-
-Show the code -
hmm <- permits_bg %>%
-  st_drop_geometry() %>%
-  group_by(year) %>%
-  summarize_all(.funs = list(~sum(is.na(.)))) # Check NA for all columns
-
@@ -9659,78 +9704,78 @@

Show the code -
rf_predictions <- rf_predictions %>%
-                      mutate(race_comp = case_when(
-                        percent_nonwhite >= .50 ~ "Majority Non-White",
-                        TRUE ~ "Majority White"
-                      ))
-
-ggplot(rf_predictions, aes(y = abs_error, color = race_comp)) +
-  geom_boxplot(fill = NA)
+
rf_val_preds <- rf_val_preds %>%
+                      mutate(race_comp = case_when(
+                        percent_nonwhite >= .50 ~ "Majority Non-White",
+                        TRUE ~ "Majority White"
+                      ))
+
+ggplot(rf_val_preds, aes(y = abs_error, color = race_comp)) +
+  geom_boxplot(fill = NA)
-

+

We find that error is not related to affordability and actually trends downward with percent nonwhite. (This is probably because there is less total development happening there in majority-minority neighborhoods to begin with, so the magnitude of error is less, even though proportionally it might be more.) Error increases slightly with total pop. This makes sense–more people –> more development.

Show the code -
ggplot(rf_predictions, aes(y = abs_error, x = rent_burden)) + # or whatever the variable is
-  geom_point() +
-  geom_smooth(method = "lm", se= FALSE) +
-  theme_minimal()
+
ggplot(rf_val_preds, aes(y = abs_error, x = rent_burden)) + # or whatever the variable is
+  geom_point() +
+  geom_smooth(method = "lm", se= FALSE) +
+  theme_minimal()
-

+

Show the code -
ggplot(rf_predictions, aes(y = abs_error, x = percent_nonwhite)) + # or whatever the variable is
-  geom_point() +
-  geom_smooth(method = "lm", se= FALSE) +
-  theme_minimal()
+
ggplot(rf_val_preds, aes(y = abs_error, x = percent_nonwhite)) + # or whatever the variable is
+  geom_point() +
+  geom_smooth(method = "lm", se= FALSE) +
+  theme_minimal()
-

+

Show the code -
ggplot(rf_predictions, aes(y = abs_error, x = total_pop)) + # or whatever the variable is
-  geom_point() +
-  geom_smooth(method = "lm", se= FALSE) +
-  theme_minimal()
+
ggplot(rf_val_preds, aes(y = abs_error, x = total_pop)) + # or whatever the variable is
+  geom_point() +
+  geom_smooth(method = "lm", se= FALSE) +
+  theme_minimal()
-

+

Show the code -
ggplot(rf_predictions, aes(y = abs_error, x = med_inc)) + # or whatever the variable is
-  geom_point() +
-  geom_smooth(method = "lm", se= FALSE) +
-  theme_minimal()
+
ggplot(rf_val_preds, aes(y = abs_error, x = med_inc)) + # or whatever the variable is
+  geom_point() +
+  geom_smooth(method = "lm", se= FALSE) +
+  theme_minimal()
-

+

How does this generalize across council districts? Don’t forget to refactor

Show the code -
suppressMessages(
-  ggplot(rf_predictions, aes(x = reorder(district, abs_error, FUN = mean), y = abs_error)) +
-    geom_boxplot(fill = NA) +
-    labs(title = "MAE by Council District",
-         y = "Mean Absolute Error",
-         x = "Council District") +
-    theme_minimal() +
-    theme()
-)
+
suppressMessages(
+  ggplot(rf_val_preds, aes(x = reorder(district, abs_error, FUN = mean), y = abs_error)) +
+    geom_boxplot(fill = NA) +
+    labs(title = "MAE by Council District",
+         y = "Mean Absolute Error",
+         x = "Council District") +
+    theme_minimal() +
+    theme()
+)
-

+

@@ -9741,17 +9786,17 @@

Show the code -
filtered_zoning <- zoning %>%
-                     filter(str_detect(CODE, "RS") | str_detect(CODE, "I"),
-                            CODE != "I2",
-                            !str_detect(CODE, "SP"))
-
-
-tm_shape(filtered_zoning) +
-        tm_polygons(col = "CODE", border.alpha = 0, colorNA = "lightgrey") +
-  tm_shape(broad_and_market) +
-  tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE)
+
filtered_zoning <- zoning %>%
+                     filter(str_detect(CODE, "RS") | str_detect(CODE, "I"),
+                            CODE != "I2",
+                            !str_detect(CODE, "SP"))
+
+
+tm_shape(filtered_zoning) +
+        tm_polygons(col = "CODE", border.alpha = 0, colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)

@@ -9761,85 +9806,85 @@

Show the code -
filtered_zoning <- st_join(
-  filtered_zoning,
-  rf_predictions %>% select(rf_predictions)
-)
-
-tm_shape(filtered_zoning) +
-        tm_polygons(col = "rf_predictions", border.alpha = 0, colorNA = "lightgrey", palette = mono_5_orange, style = "fisher") +
-  tm_shape(broad_and_market) +
-  tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE)
+
filtered_zoning <- st_join(
+  filtered_zoning,
+  rf_val_preds %>% select(rf_val_preds)
+)
+
+tm_shape(filtered_zoning) +
+        tm_polygons(col = "rf_val_preds", border.alpha = 0, colorNA = "lightgrey", palette = mono_5_orange, style = "fisher") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
-

+

Show the code -
tmap_mode('view')
-
-filtered_zoning %>%
-  filter(rf_predictions > 10) %>%
-tm_shape() +
-        tm_polygons(col = "CODE", border.alpha = 0, colorNA = "lightgrey",
-                    popup.vars = c('rf_predictions', 'CODE')) +
-  tm_shape(broad_and_market) +
-  tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE)
+
tmap_mode('view')
+
+filtered_zoning %>%
+  filter(rf_val_preds > 10) %>%
+tm_shape() +
+        tm_polygons(col = "CODE", border.alpha = 0, colorNA = "lightgrey",
+                    popup.vars = c('rf_val_preds', 'CODE')) +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
-
- +
+

Furthermore, we can identify properties with high potential for assemblage, which suggests the ability to accomodate high-density, multi-unit housing.

Show the code -
nbs <- filtered_zoning %>% 
-  mutate(nb = st_contiguity(geometry))
-
-# Create edge list while handling cases with no neighbors
-edge_list <- tibble::tibble(id = 1:length(nbs$nb), nbs = nbs$nb) %>% 
-  tidyr::unnest(nbs) %>% 
-  filter(nbs != 0)
-
-# Create a graph with a node for each row in filtered_zoning
-g <- make_empty_graph(n = nrow(filtered_zoning))
-V(g)$name <- as.character(1:nrow(filtered_zoning))
-
-# Add edges if they exist
-if (nrow(edge_list) > 0) {
-  edges <- as.matrix(edge_list)
-  g <- add_edges(g, c(t(edges)))
-}
-
-# Calculate the number of contiguous neighbors, handling nodes without neighbors
-n_contiguous <- sapply(V(g)$name, function(node) {
-  if (node %in% edges) {
-    length(neighborhood(g, order = 1, nodes = as.numeric(node))[[1]])
-  } else {
-    1  # Nodes without neighbors count as 1 (themselves)
-  }
-})
-
-filtered_zoning <- filtered_zoning %>%
-                    mutate(n_contig = n_contiguous)
-
-filtered_zoning %>%
-  st_drop_geometry() %>%
-  select(rf_predictions,
-         n_contig,
-         OBJECTID,
-         CODE) %>%
-  filter(rf_predictions > 10,
-         n_contig > 2) %>%
-  arrange(desc(rf_predictions)) %>%
-  kablerize(caption = "Poorly-Zoned Properties with High Development Risk")
+
nbs <- filtered_zoning %>% 
+  mutate(nb = st_contiguity(geometry))
+
+# Create edge list while handling cases with no neighbors
+edge_list <- tibble::tibble(id = 1:length(nbs$nb), nbs = nbs$nb) %>% 
+  tidyr::unnest(nbs) %>% 
+  filter(nbs != 0)
+
+# Create a graph with a node for each row in filtered_zoning
+g <- make_empty_graph(n = nrow(filtered_zoning))
+V(g)$name <- as.character(1:nrow(filtered_zoning))
+
+# Add edges if they exist
+if (nrow(edge_list) > 0) {
+  edges <- as.matrix(edge_list)
+  g <- add_edges(g, c(t(edges)))
+}
+
+# Calculate the number of contiguous neighbors, handling nodes without neighbors
+n_contiguous <- sapply(V(g)$name, function(node) {
+  if (node %in% edges) {
+    length(neighborhood(g, order = 1, nodes = as.numeric(node))[[1]])
+  } else {
+    1  # Nodes without neighbors count as 1 (themselves)
+  }
+})
+
+filtered_zoning <- filtered_zoning %>%
+                    mutate(n_contig = n_contiguous)
+
+filtered_zoning %>%
+  st_drop_geometry() %>%
+  select(rf_val_preds,
+         n_contig,
+         OBJECTID,
+         CODE) %>%
+  filter(rf_val_preds > 10,
+         n_contig > 2) %>%
+  arrange(desc(rf_val_preds)) %>%
+  kablerize(caption = "Poorly-Zoned Properties with High Development Risk")
@@ -9847,7 +9892,7 @@

- + @@ -9855,368 +9900,270 @@

- - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - + + + + + + + + - + - + - + - - - + + + - + - + - + - + - + - - - - - - - - - + + - + - - - - - - - - - - - - - - - - + + - + - - + + - - - - - - - - + - - - - - - - - - - - - - - - - + + - + - - - - - - - - - + + - + - - + + - + - - + + - + - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - + - + - + - - + + - - + + - - - - - - - - - + + - + - - - - - - - - - + + - + - - + + - - + + - - - - - + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - + + + + + - - - - - - - - + - + - + - + - + - + - + - + - + - - - - - - - - - + + - - + + - - - - - + + + + +
rf_predictionsrf_val_preds n_contig OBJECTID CODE
751744.2962086827.06613 316717RSA51615ICMX
495741.61663154827.06613 310410ICMX2736IRMX
495841.61663158727.06613 310411RSA52804IRMX
495941.61663342027.06613 310412ICMX6405RSA5
524541.61663466727.06613 3111609661 RSA5
916927.06613420073ICMX
176834.2469022.24860 3 3128 IRMX
364034.2469022.24860 3 6901 ICMX
446029.67577
751721.08143 3909316717 RSA5
393428.7265720.84390 3 7646 ICMX
1232628.7265720.84390 4 25776 RSA5
1357824.43513327869IRMX
86823.84580495720.67827 3161510410 ICMX
154823.8458032736IRMX
158723.8458032804IRMX
342023.84580495820.67827 3640510411 RSA5
466723.84580495920.67827 39661RSA5
916923.8458042007310412 ICMX
508819.26747310759IRMX
772618.15027317168ICMX
783318.11450524520.67827 31740811160 RSA5
664516.99250314648ICMX
728016.99250446017.31087 3161799093 RSA5
991216.99250772615.16207 32152717168 ICMX
395715.647831357815.12210 3770427869 IRMX
596414.50423312931ICMX
639614.50423313980RSA3
654014.50423314372RSA5
655014.50423508815.00973 314401RSA5
213813.645004374410759 IRMX
451213.2790314.95163 5 9243 IRMX
601413.2790314.95163 6 13057 ICMX
577613.26503304112.82333 312473RSA55568ICMX
825213.26503318254RSA3
1284013.26503984212.82333 32662721369 RSA5
620012.71553413532ICMX
669112.43360984312.82333 31474721370 ICMX
414612.33040984512.82333 38265IRMX21372RSA5
510812.33040410795IRMX783312.25843317408RSA5
431812.08010395711.49060 38705RSD37704IRMX
1327012.08010664511.22550 327311RSD114648ICMX
13750.112.08010728011.22550 328226RSA316179RSA5
1334010.30770991211.22550 327419RSD321527ICMX
1403310.30770328807RSD3213810.8825343744IRMX
1403410.30770328808RSD1
814310.2576010.71940 3 18031 RSD3
865610.2576010.71940 3 19076 RSA3
940910.2576010.71940 4 20534 RSA2
1017510.2576010.71940 3 22002 RSD1
1260510.2576010.71940 3 26247 RSD1
570610.09430312329ICMX
300710.07567414610.39053 35506RSA58265IRMX
455810.0756739370ICMX510810.39053410795IRMX
@@ -10225,49 +10172,45 @@

Show the code -
tmap_mode('view')
-
-filtered_zoning %>%
-  select(rf_predictions,
-         n_contig,
-         OBJECTID,
-         CODE) %>%
-  filter(rf_predictions > 10,
-         n_contig > 2) %>%
-tm_shape() +
-        tm_polygons(col = "rf_predictions", border.alpha = 0, colorNA = "lightgrey", palette = "viridis", style = "fisher",
-                    popup.vars = c('rf_predictions', 'n_contig', 'CODE'), alpha = 0.5) +
-  tm_shape(broad_and_market) +
-  tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE)
+
tmap_mode('view')
+
+filtered_zoning %>%
+  select(rf_val_preds,
+         n_contig,
+         OBJECTID,
+         CODE) %>%
+  filter(rf_val_preds > 10,
+         n_contig > 2) %>%
+tm_shape() +
+        tm_polygons(col = "rf_val_preds", border.alpha = 0, colorNA = "lightgrey", palette = "viridis", style = "fisher",
+                    popup.vars = c('rf_val_preds', 'n_contig', 'CODE'), alpha = 0.5) +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE)
-
- +
+

-
-

5.4 2024 Predictions

-

Just for shits and giggles, throw in 2024 predictions. (Can use data from 2023.)

+
+

5.4 Random Forest Regression: Projection

+

Just for shits and giggles, we can now predict for 2024.

Show the code -
rf_predictions_2024 <- predict(rf, permits_predict)
-rf_predictions_2024 <- cbind(permits_predict, rf_predictions_2024)
-
-
-tm_shape(rf_predictions_2024) +
-        tm_polygons(col = "rf_predictions_2024", border.alpha = 0, palette = mono_5_green, style = "fisher", colorNA = "lightgrey") +
-  tm_shape(broad_and_market) +
-  tm_lines(col = "lightgrey") +
-  tm_layout(frame = FALSE) 
+
tmap_mode('plot')
+
+tm_shape(rf_proj_preds) +
+        tm_polygons(col = "rf_proj_preds", border.alpha = 0, palette = mono_5_green, style = "fisher", colorNA = "lightgrey") +
+  tm_shape(broad_and_market) +
+  tm_lines(col = "lightgrey") +
+  tm_layout(frame = FALSE) 
- -
- +