diff --git a/.nojekyll b/.nojekyll
index 7565821..b083c9a 100644
--- a/.nojekyll
+++ b/.nojekyll
@@ -1 +1 @@
-8cb23537
\ No newline at end of file
+3293c4dc
\ No newline at end of file
diff --git a/assets/permits_animation.gif b/assets/permits_animation.gif
index 0f53bd7..b2f6a46 100644
Binary files a/assets/permits_animation.gif and b/assets/permits_animation.gif differ
diff --git a/final.html b/final.html
index 99f12e1..3e4c12a 100644
--- a/final.html
+++ b/final.html
@@ -3302,20 +3302,18 @@
document.body.style.height = "100%";
document.documentElement.style.width = "100%";
document.documentElement.style.height = "100%";
- if (cel) {
- cel.style.position = "absolute";
- var pad = unpackPadding(sizing.padding);
- cel.style.top = pad.top + "px";
- cel.style.right = pad.right + "px";
- cel.style.bottom = pad.bottom + "px";
- cel.style.left = pad.left + "px";
- el.style.width = "100%";
- el.style.height = "100%";
- }
+ cel.style.position = "absolute";
+ var pad = unpackPadding(sizing.padding);
+ cel.style.top = pad.top + "px";
+ cel.style.right = pad.right + "px";
+ cel.style.bottom = pad.bottom + "px";
+ cel.style.left = pad.left + "px";
+ el.style.width = "100%";
+ el.style.height = "100%";
return {
- getWidth: function() { return cel.offsetWidth; },
- getHeight: function() { return cel.offsetHeight; }
+ getWidth: function() { return cel.getBoundingClientRect().width; },
+ getHeight: function() { return cel.getBoundingClientRect().height; }
};
} else {
@@ -3323,8 +3321,8 @@
el.style.height = px(sizing.height);
return {
- getWidth: function() { return el.offsetWidth; },
- getHeight: function() { return el.offsetHeight; }
+ getWidth: function() { return cel.getBoundingClientRect().width; },
+ getHeight: function() { return cel.getBoundingClientRect().height; }
};
}
}
@@ -3548,8 +3546,8 @@
elementData(el, "initialized", true);
if (bindingDef.initialize) {
- var result = bindingDef.initialize(el, el.offsetWidth,
- el.offsetHeight);
+ var rect = el.getBoundingClientRect();
+ var result = bindingDef.initialize(el, rect.width, rect.height);
elementData(el, "init_result", result);
}
}
@@ -3591,29 +3589,30 @@
forEach(matches, function(el) {
var sizeObj = initSizing(el, binding);
+ var getSize = function(el) {
+ if (sizeObj) {
+ return {w: sizeObj.getWidth(), h: sizeObj.getHeight()}
+ } else {
+ var rect = el.getBoundingClientRect();
+ return {w: rect.width, h: rect.height}
+ }
+ };
+
if (hasClass(el, "html-widget-static-bound"))
return;
el.className = el.className + " html-widget-static-bound";
var initResult;
if (binding.initialize) {
- initResult = binding.initialize(el,
- sizeObj ? sizeObj.getWidth() : el.offsetWidth,
- sizeObj ? sizeObj.getHeight() : el.offsetHeight
- );
+ var size = getSize(el);
+ initResult = binding.initialize(el, size.w, size.h);
elementData(el, "init_result", initResult);
}
if (binding.resize) {
- var lastSize = {
- w: sizeObj ? sizeObj.getWidth() : el.offsetWidth,
- h: sizeObj ? sizeObj.getHeight() : el.offsetHeight
- };
+ var lastSize = getSize(el);
var resizeHandler = function(e) {
- var size = {
- w: sizeObj ? sizeObj.getWidth() : el.offsetWidth,
- h: sizeObj ? sizeObj.getHeight() : el.offsetHeight
- };
+ var size = getSize(el);
if (size.w === 0 && size.h === 0)
return;
if (size.w === lastSize.w && size.h === lastSize.h)
@@ -3915,7 +3914,6 @@
return result;
}
})();
-
+
+
@@ -9415,23 +9409,28 @@
Show the code
- corr_vars <- c ("total_pop" ,
- "med_inc" ,
- "percent_nonwhite" ,
- "percent_renters" ,
- "rent_burden" ,
- "ext_rent_burden" )
-
- corr_dat <- permits_bg %>% select (all_of (corr_vars)) %>% select (where (is.numeric)) %>% st_drop_geometry () %>% unique () %>% na.omit ()
-
- corr <- round (cor (corr_dat), 2 )
- p.mat <- cor_pmat (corr_dat)
-
-ggcorrplot (corr, p.mat = p.mat, hc.order = TRUE ,
- type = "full" , insig = "blank" , lab = TRUE , colors = c (palette[2 ], "white" , palette[3 ]))
+ corr_vars <- c ("total_pop" ,
+ "med_inc" ,
+ "percent_nonwhite" ,
+ "percent_renters" ,
+ "rent_burden" ,
+ "ext_rent_burden" )
+
+ corr_dat <- permits_bg %>% select (all_of (corr_vars), permits_count) %>% select (where (is.numeric)) %>% st_drop_geometry () %>% unique () %>% na.omit ()
+
+ corr <- round (cor (corr_dat), 2 )
+ p.mat <- cor_pmat (corr_dat)
+
+ggcorrplot (corr, p.mat = p.mat, hc.order = TRUE ,
+ type = "full" , insig = "blank" , lab = TRUE , colors = c (palette[2 ], "white" , palette[3 ])) +
+ annotate (
+ geom = "rect" ,
+ xmin = .5 , xmax = 7.5 , ymin = 4.5 , ymax = 5.5 ,
+ fill = "transparent" , color = "red" , alpha = 0.5
+ )
-
+
@@ -9441,57 +9440,57 @@
Show the code
- lisa <- permits_bg %>%
- filter (year == 2023 ) %>%
- mutate (nb = st_contiguity (geometry),
- wt = st_weights (nb),
- permits_lag = st_lag (permits_count, nb, wt),
- moran = local_moran (permits_count, nb, wt)) %>%
- tidyr:: unnest (moran) %>%
- mutate (pysal = ifelse (p_folded_sim <= 0.1 , as.character (pysal), NA ),
- hotspot = case_when (
- pysal == "High-High" ~ "Signficant" ,
- TRUE ~ "Not Signficant"
- ))
-
-#
-# palette <- c("High-High" = "#B20016",
-# "Low-Low" = "#1C4769",
-# "Low-High" = "#24975E",
-# "High-Low" = "#EACA97")
-
- morans_i <- tm_shape (lisa) +
- tm_polygons (col = "ii" , border.alpha = 0 , style = "jenks" , palette = 'viridis' )
-
- p_value <- tm_shape (lisa) +
- tm_polygons (col = "p_ii" , border.alpha = 0 , style = "jenks" , palette = '-viridis' )
-
- sig_hotspots <- tm_shape (lisa) +
- tm_polygons (col = "hotspot" , border.alpha = 0 , style = "cat" , palette = 'viridis' , textNA = "Not a Hotspot" )
-
-tmap_arrange (morans_i, p_value, sig_hotspots, ncol = 3 )
+ lisa <- permits_bg %>%
+ filter (year == 2023 ) %>%
+ mutate (nb = st_contiguity (geometry),
+ wt = st_weights (nb),
+ permits_lag = st_lag (permits_count, nb, wt),
+ moran = local_moran (permits_count, nb, wt)) %>%
+ tidyr:: unnest (moran) %>%
+ mutate (pysal = ifelse (p_folded_sim <= 0.1 , as.character (pysal), NA ),
+ hotspot = case_when (
+ pysal == "High-High" ~ "Signficant" ,
+ TRUE ~ "Not Signficant"
+ ))
+
+#
+# palette <- c("High-High" = "#B20016",
+# "Low-Low" = "#1C4769",
+# "Low-High" = "#24975E",
+# "High-Low" = "#EACA97")
+
+ morans_i <- tm_shape (lisa) +
+ tm_polygons (col = "ii" , border.alpha = 0 , style = "jenks" , palette = mono_5_green)
+
+ p_value <- tm_shape (lisa) +
+ tm_polygons (col = "p_ii" , border.alpha = 0 , style = "jenks" , palette = mono_5_green)
+
+ sig_hotspots <- tm_shape (lisa) +
+ tm_polygons (col = "hotspot" , border.alpha = 0 , style = "cat" , palette = c (palette[2 ], palette[3 ]), textNA = "Not a Hotspot" )
+
+tmap_arrange (morans_i, p_value, sig_hotspots, ncol = 3 )
-
+
Emergeging hotspots
Show the code
-# stc <- as_spacetime(permits_bg,
-# .loc_col = "geoid10",
-# .time_col = "year")
-#
-# # conduct EHSA
-# ehsa <- emerging_hotspot_analysis(
-# x = stc,
-# .var = "permits_count",
-# k = 1,
-# nsim = 5
-# )
-#
-# count(ehsa, classification)
+# stc <- as_spacetime(permits_bg %>% select(permits_count, geoid10, year) %>% na.omit(),
+# .loc_col = "geoid10",
+# .time_col = "year")
+#
+# # conduct EHSA
+# ehsa <- emerging_hotspot_analysis(
+# x = stc,
+# .var = "permits_count",
+# k = 1,
+# nsim = 5
+# )
+#
+# count(ehsa, classification)
@@ -9500,21 +9499,21 @@
Show the code
- permits_bg_long <- permits_bg %>%
- filter (! year %in% c (2024 )) %>%
- st_drop_geometry () %>%
- pivot_longer (
- cols = c (starts_with ("lag" ), dist_to_2022),
- names_to = "Variable" ,
- values_to = "Value"
- )
-
-
-ggscatter (permits_bg_long, x = "permits_count" , y = "Value" , facet.by = "Variable" ,
- add = "reg.line" ,
- add.params = list (color = palette[3 ], fill = palette[5 ]),
- conf.int = TRUE
- ) + stat_cor (method = "pearson" , p.accuracy = 0.001 , r.accuracy = 0.01 )
+ permits_bg_long <- permits_bg %>%
+ filter (! year %in% c (2024 )) %>%
+ st_drop_geometry () %>%
+ pivot_longer (
+ cols = c (starts_with ("lag" ), dist_to_2022),
+ names_to = "Variable" ,
+ values_to = "Value"
+ )
+
+
+ggscatter (permits_bg_long, x = "permits_count" , y = "Value" , facet.by = "Variable" ,
+ add = "reg.line" ,
+ add.params = list (color = palette[3 ], fill = palette[5 ]),
+ conf.int = TRUE
+ ) + stat_cor (method = "pearson" , p.accuracy = 0.001 , r.accuracy = 0.01 )
@@ -9536,94 +9535,147 @@
Show the code
- permits_train <- filter (permits_bg %>% select (- c (mapname, geoid10)), year < 2022 )
- permits_test <- filter (permits_bg %>% select (- c (mapname, geoid10)), year == 2022 )
- permits_validate <- filter (permits_bg %>% select (- c (mapname, geoid10)), year == 2023 )
- permits_predict <- filter (permits_bg %>% select (- c (mapname, geoid10)), year == 2024 )
-
- reg <- lm (permits_count ~ ., data = st_drop_geometry (permits_train))
-
- predictions <- predict (reg, permits_test)
- predictions <- cbind (permits_test, predictions)
-
- predictions <- predictions %>%
- mutate (abs_error = abs (permits_count - predictions),
- pct_error = abs_error / permits_count)
-
-ggplot (predictions, aes (x = permits_count, y = predictions)) +
- geom_point () +
- labs (title = "Predicted vs. Actual Permits" ,
- subtitle = "2022" ) +
- geom_smooth (method = "lm" , se = FALSE )
+ggplot (ols_preds, aes (x = permits_count, y = ols_preds)) +
+ geom_point () +
+ labs (title = "Predicted vs. Actual Permits" ,
+ subtitle = "2022" ) +
+ geom_smooth (method = "lm" , se = FALSE )
-
+
Show the code
- mae <- paste0 ("MAE: " , round (mean (predictions$ abs_error, na.rm = TRUE ), 2 ))
+ggplot (ols_preds, aes (x = abs_error)) +
+ geom_histogram () +
+ labs (title = "Distribution of Absolute Error per Block Group" ,
+ subtitle = "OLS, 2022" )
+
+
+
+
+
+Show the code
+ ols_mae <- paste0 ("MAE: " , round (mean (ols_preds$ abs_error, na.rm = TRUE ), 2 ))
-tm_shape (predictions) +
- tm_polygons (col = "abs_error" , border.alpha = 0 , palette = 'viridis' , style = "fisher" , colorNA = "lightgrey" ) +
+ ols_preds_map <- tm_shape (ols_preds) +
+ tm_polygons (col = "ols_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" ) +
tm_shape (broad_and_market) +
tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+ tm_layout (frame = FALSE )
+
+ ols_error_map <- tm_shape (ols_preds) +
+ tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
+
+tmap_arrange (ols_preds_map, ols_error_map)
-
+
We find that our OLS model has an MAE of only MAE: 2.66–not bad for such a simple model! Still, it struggles most in the areas where we most need it to succeed, so we will try to introduce better variables and apply a more complex model to improve our predictions.
-
- Random Forest Regression
+
+ Random Forest Regression: Testing
+We train and test up to 2022–we use this for model tuning and feature engineering.
Show the code
- rf <- randomForest (permits_count ~ .,
- data = st_drop_geometry (permits_train),
- importance = TRUE ,
- na.action = na.omit)
-
- rf_predictions <- predict (rf, permits_test)
- rf_predictions <- cbind (permits_test, rf_predictions)
- rf_predictions <- rf_predictions %>%
- mutate (abs_error = abs (permits_count - rf_predictions),
- pct_error = abs_error / (permits_count + 0.0001 ))
-
-tm_shape (rf_predictions) +
- tm_polygons (col = "rf_predictions" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+ test_preds_map <- tm_shape (rf_test_preds) +
+ tm_polygons (col = "rf_test_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
+
+ test_error_map <- tm_shape (rf_test_preds) +
+ tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
+
+tmap_arrange (test_preds_map, test_error_map)
-
+
Show the code
-ggplot (rf_predictions, aes (x = permits_count, y = rf_predictions)) +
- geom_point () +
- labs (title = "Predicted vs. Actual Permits" ,
- subtitle = "2022" ) +
- geom_smooth (method = "lm" , se = FALSE )
+ggplot (rf_test_preds, aes (x = abs_error)) +
+ geom_histogram () +
+ labs (title = "Distribution of Absolute Error per Block Group" ,
+ subtitle = "Random Forest, 2022" )
-
+
Show the code
- rf_mae <- paste0 ("MAE: " , round (mean (rf_predictions$ abs_error, na.rm = TRUE ), 2 ))
-
-tm_shape (rf_predictions) +
- tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+ggplot (rf_test_preds, aes (x = permits_count, y = rf_test_preds)) +
+ geom_point () +
+ labs (title = "Predicted vs. Actual Permits" ,
+ subtitle = "2022" ) +
+ geom_smooth (method = "lm" , se = FALSE )
-
+
+
+Show the code
+ rf_test_mae <- paste0 ("MAE: " , round (mean (rf_test_preds$ abs_error, na.rm = TRUE ), 2 ))
+
+
+
+
+ Random Forest Regression: Validation
+Having settled on our model features and tuning, we now validate on 2023 data.
+
+
+Show the code
+ val_preds_map <- tm_shape (rf_val_preds) +
+ tm_polygons (col = "rf_val_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
+
+ val_error_map <- tm_shape (rf_val_preds) +
+ tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
+
+tmap_arrange (val_preds_map, val_error_map)
+
+
+
+
+
+Show the code
+ggplot (rf_val_preds, aes (x = abs_error)) +
+ geom_histogram () +
+ labs (title = "Distribution of Absolute Error per Block Group" ,
+ subtitle = "Random Forest, 2023" )
+
+
+
+
+
+Show the code
+ggplot (rf_val_preds, aes (x = permits_count, y = rf_val_preds)) +
+ geom_point () +
+ labs (title = "Predicted vs. Actual Permits" ,
+ subtitle = "2023" ) +
+ geom_smooth (method = "lm" , se = FALSE )
+
+
+
+
+
+Show the code
+ rf_val_mae <- paste0 ("MAE: " , round (mean (rf_val_preds$ abs_error, na.rm = TRUE ), 2 ))
+
@@ -9635,23 +9687,16 @@
Show the code
- nbins <- as.integer (sqrt (nrow (rf_predictions)))
- vline <- mean (rf_predictions$ abs_error, na.rm = TRUE )
-
-ggplot (rf_predictions, aes (x = abs_error)) +
- geom_histogram (bins = nbins) +
- geom_vline (aes (xintercept = vline))
+ nbins <- as.integer (sqrt (nrow (rf_val_preds)))
+ vline <- mean (rf_val_preds$ abs_error, na.rm = TRUE )
+
+ggplot (rf_val_preds, aes (x = abs_error)) +
+ geom_histogram (bins = nbins) +
+ geom_vline (aes (xintercept = vline))
-
+
-
-Show the code
- hmm <- permits_bg %>%
- st_drop_geometry () %>%
- group_by (year) %>%
- summarize_all (.funs = list (~ sum (is.na (.)))) # Check NA for all columns
-
@@ -9659,78 +9704,78 @@
Show the code
- rf_predictions <- rf_predictions %>%
- mutate (race_comp = case_when (
- percent_nonwhite >= .50 ~ "Majority Non-White" ,
- TRUE ~ "Majority White"
- ))
-
-ggplot (rf_predictions, aes (y = abs_error, color = race_comp)) +
- geom_boxplot (fill = NA )
+ rf_val_preds <- rf_val_preds %>%
+ mutate (race_comp = case_when (
+ percent_nonwhite >= .50 ~ "Majority Non-White" ,
+ TRUE ~ "Majority White"
+ ))
+
+ggplot (rf_val_preds, aes (y = abs_error, color = race_comp)) +
+ geom_boxplot (fill = NA )
-
+
We find that error is not related to affordability and actually trends downward with percent nonwhite. (This is probably because there is less total development happening there in majority-minority neighborhoods to begin with, so the magnitude of error is less, even though proportionally it might be more.) Error increases slightly with total pop. This makes sense–more people –> more development.
Show the code
-ggplot (rf_predictions, aes (y = abs_error, x = rent_burden)) + # or whatever the variable is
- geom_point () +
- geom_smooth (method = "lm" , se= FALSE ) +
- theme_minimal ()
+ggplot (rf_val_preds, aes (y = abs_error, x = rent_burden)) + # or whatever the variable is
+ geom_point () +
+ geom_smooth (method = "lm" , se= FALSE ) +
+ theme_minimal ()
-
+
Show the code
-ggplot (rf_predictions, aes (y = abs_error, x = percent_nonwhite)) + # or whatever the variable is
- geom_point () +
- geom_smooth (method = "lm" , se= FALSE ) +
- theme_minimal ()
+ggplot (rf_val_preds, aes (y = abs_error, x = percent_nonwhite)) + # or whatever the variable is
+ geom_point () +
+ geom_smooth (method = "lm" , se= FALSE ) +
+ theme_minimal ()
-
+
Show the code
-ggplot (rf_predictions, aes (y = abs_error, x = total_pop)) + # or whatever the variable is
- geom_point () +
- geom_smooth (method = "lm" , se= FALSE ) +
- theme_minimal ()
+ggplot (rf_val_preds, aes (y = abs_error, x = total_pop)) + # or whatever the variable is
+ geom_point () +
+ geom_smooth (method = "lm" , se= FALSE ) +
+ theme_minimal ()
-
+
Show the code
-ggplot (rf_predictions, aes (y = abs_error, x = med_inc)) + # or whatever the variable is
- geom_point () +
- geom_smooth (method = "lm" , se= FALSE ) +
- theme_minimal ()
+ggplot (rf_val_preds, aes (y = abs_error, x = med_inc)) + # or whatever the variable is
+ geom_point () +
+ geom_smooth (method = "lm" , se= FALSE ) +
+ theme_minimal ()
-
+
How does this generalize across council districts? Don’t forget to refactor
Show the code
-suppressMessages (
- ggplot (rf_predictions, aes (x = reorder (district, abs_error, FUN = mean), y = abs_error)) +
- geom_boxplot (fill = NA ) +
- labs (title = "MAE by Council District" ,
- y = "Mean Absolute Error" ,
- x = "Council District" ) +
- theme_minimal () +
- theme ()
- )
+suppressMessages (
+ ggplot (rf_val_preds, aes (x = reorder (district, abs_error, FUN = mean), y = abs_error)) +
+ geom_boxplot (fill = NA ) +
+ labs (title = "MAE by Council District" ,
+ y = "Mean Absolute Error" ,
+ x = "Council District" ) +
+ theme_minimal () +
+ theme ()
+ )
-
+
@@ -9741,17 +9786,17 @@
Show the code
- filtered_zoning <- zoning %>%
- filter (str_detect (CODE, "RS" ) | str_detect (CODE, "I" ),
- CODE != "I2" ,
- ! str_detect (CODE, "SP" ))
-
-
-tm_shape (filtered_zoning) +
- tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+ filtered_zoning <- zoning %>%
+ filter (str_detect (CODE, "RS" ) | str_detect (CODE, "I" ),
+ CODE != "I2" ,
+ ! str_detect (CODE, "SP" ))
+
+
+tm_shape (filtered_zoning) +
+ tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
@@ -9761,85 +9806,85 @@
Show the code
- filtered_zoning <- st_join (
- filtered_zoning,
- rf_predictions %>% select (rf_predictions)
- )
-
-tm_shape (filtered_zoning) +
- tm_polygons (col = "rf_predictions" , border.alpha = 0 , colorNA = "lightgrey" , palette = mono_5_orange, style = "fisher" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+ filtered_zoning <- st_join (
+ filtered_zoning,
+ rf_val_preds %>% select (rf_val_preds)
+ )
+
+tm_shape (filtered_zoning) +
+ tm_polygons (col = "rf_val_preds" , border.alpha = 0 , colorNA = "lightgrey" , palette = mono_5_orange, style = "fisher" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
-
+
Show the code
-tmap_mode ('view' )
-
- filtered_zoning %>%
- filter (rf_predictions > 10 ) %>%
-tm_shape () +
- tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" ,
- popup.vars = c ('rf_predictions' , 'CODE' )) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+tmap_mode ('view' )
+
+ filtered_zoning %>%
+ filter (rf_val_preds > 10 ) %>%
+tm_shape () +
+ tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" ,
+ popup.vars = c ('rf_val_preds' , 'CODE' )) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
Furthermore, we can identify properties with high potential for assemblage, which suggests the ability to accomodate high-density, multi-unit housing.
Show the code
- nbs <- filtered_zoning %>%
- mutate (nb = st_contiguity (geometry))
-
-# Create edge list while handling cases with no neighbors
- edge_list <- tibble:: tibble (id = 1 : length (nbs$ nb), nbs = nbs$ nb) %>%
- tidyr:: unnest (nbs) %>%
- filter (nbs != 0 )
-
-# Create a graph with a node for each row in filtered_zoning
- g <- make_empty_graph (n = nrow (filtered_zoning))
-V (g)$ name <- as.character (1 : nrow (filtered_zoning))
-
-# Add edges if they exist
-if (nrow (edge_list) > 0 ) {
- edges <- as.matrix (edge_list)
- g <- add_edges (g, c (t (edges)))
- }
-
-# Calculate the number of contiguous neighbors, handling nodes without neighbors
- n_contiguous <- sapply (V (g)$ name, function (node) {
- if (node %in% edges) {
- length (neighborhood (g, order = 1 , nodes = as.numeric (node))[[1 ]])
- } else {
- 1 # Nodes without neighbors count as 1 (themselves)
- }
- })
-
- filtered_zoning <- filtered_zoning %>%
- mutate (n_contig = n_contiguous)
-
- filtered_zoning %>%
- st_drop_geometry () %>%
- select (rf_predictions,
- n_contig,
- OBJECTID,
- CODE) %>%
- filter (rf_predictions > 10 ,
- n_contig > 2 ) %>%
- arrange (desc (rf_predictions)) %>%
- kablerize (caption = "Poorly-Zoned Properties with High Development Risk" )
+ nbs <- filtered_zoning %>%
+ mutate (nb = st_contiguity (geometry))
+
+# Create edge list while handling cases with no neighbors
+ edge_list <- tibble:: tibble (id = 1 : length (nbs$ nb), nbs = nbs$ nb) %>%
+ tidyr:: unnest (nbs) %>%
+ filter (nbs != 0 )
+
+# Create a graph with a node for each row in filtered_zoning
+ g <- make_empty_graph (n = nrow (filtered_zoning))
+V (g)$ name <- as.character (1 : nrow (filtered_zoning))
+
+# Add edges if they exist
+if (nrow (edge_list) > 0 ) {
+ edges <- as.matrix (edge_list)
+ g <- add_edges (g, c (t (edges)))
+ }
+
+# Calculate the number of contiguous neighbors, handling nodes without neighbors
+ n_contiguous <- sapply (V (g)$ name, function (node) {
+ if (node %in% edges) {
+ length (neighborhood (g, order = 1 , nodes = as.numeric (node))[[1 ]])
+ } else {
+ 1 # Nodes without neighbors count as 1 (themselves)
+ }
+ })
+
+ filtered_zoning <- filtered_zoning %>%
+ mutate (n_contig = n_contiguous)
+
+ filtered_zoning %>%
+ st_drop_geometry () %>%
+ select (rf_val_preds,
+ n_contig,
+ OBJECTID,
+ CODE) %>%
+ filter (rf_val_preds > 10 ,
+ n_contig > 2 ) %>%
+ arrange (desc (rf_val_preds)) %>%
+ kablerize (caption = "Poorly-Zoned Properties with High Development Risk" )
@@ -10225,49 +10172,45 @@
Show the code
-tmap_mode ('view' )
-
- filtered_zoning %>%
- select (rf_predictions,
- n_contig,
- OBJECTID,
- CODE) %>%
- filter (rf_predictions > 10 ,
- n_contig > 2 ) %>%
-tm_shape () +
- tm_polygons (col = "rf_predictions" , border.alpha = 0 , colorNA = "lightgrey" , palette = "viridis" , style = "fisher" ,
- popup.vars = c ('rf_predictions' , 'n_contig' , 'CODE' ), alpha = 0.5 ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+tmap_mode ('view' )
+
+ filtered_zoning %>%
+ select (rf_val_preds,
+ n_contig,
+ OBJECTID,
+ CODE) %>%
+ filter (rf_val_preds > 10 ,
+ n_contig > 2 ) %>%
+tm_shape () +
+ tm_polygons (col = "rf_val_preds" , border.alpha = 0 , colorNA = "lightgrey" , palette = "viridis" , style = "fisher" ,
+ popup.vars = c ('rf_val_preds' , 'n_contig' , 'CODE' ), alpha = 0.5 ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
-
- 2024 Predictions
-Just for shits and giggles, throw in 2024 predictions. (Can use data from 2023.)
+
+ Random Forest Regression: Projection
+Just for shits and giggles, we can now predict for 2024.
Show the code
- rf_predictions_2024 <- predict (rf, permits_predict)
- rf_predictions_2024 <- cbind (permits_predict, rf_predictions_2024)
-
-
-tm_shape (rf_predictions_2024) +
- tm_polygons (col = "rf_predictions_2024" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+tmap_mode ('plot' )
+
+tm_shape (rf_proj_preds) +
+ tm_polygons (col = "rf_proj_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
-
-
-
+
diff --git a/index.html b/index.html
index 99f12e1..3e4c12a 100644
--- a/index.html
+++ b/index.html
@@ -3302,20 +3302,18 @@
document.body.style.height = "100%";
document.documentElement.style.width = "100%";
document.documentElement.style.height = "100%";
- if (cel) {
- cel.style.position = "absolute";
- var pad = unpackPadding(sizing.padding);
- cel.style.top = pad.top + "px";
- cel.style.right = pad.right + "px";
- cel.style.bottom = pad.bottom + "px";
- cel.style.left = pad.left + "px";
- el.style.width = "100%";
- el.style.height = "100%";
- }
+ cel.style.position = "absolute";
+ var pad = unpackPadding(sizing.padding);
+ cel.style.top = pad.top + "px";
+ cel.style.right = pad.right + "px";
+ cel.style.bottom = pad.bottom + "px";
+ cel.style.left = pad.left + "px";
+ el.style.width = "100%";
+ el.style.height = "100%";
return {
- getWidth: function() { return cel.offsetWidth; },
- getHeight: function() { return cel.offsetHeight; }
+ getWidth: function() { return cel.getBoundingClientRect().width; },
+ getHeight: function() { return cel.getBoundingClientRect().height; }
};
} else {
@@ -3323,8 +3321,8 @@
el.style.height = px(sizing.height);
return {
- getWidth: function() { return el.offsetWidth; },
- getHeight: function() { return el.offsetHeight; }
+ getWidth: function() { return cel.getBoundingClientRect().width; },
+ getHeight: function() { return cel.getBoundingClientRect().height; }
};
}
}
@@ -3548,8 +3546,8 @@
elementData(el, "initialized", true);
if (bindingDef.initialize) {
- var result = bindingDef.initialize(el, el.offsetWidth,
- el.offsetHeight);
+ var rect = el.getBoundingClientRect();
+ var result = bindingDef.initialize(el, rect.width, rect.height);
elementData(el, "init_result", result);
}
}
@@ -3591,29 +3589,30 @@
forEach(matches, function(el) {
var sizeObj = initSizing(el, binding);
+ var getSize = function(el) {
+ if (sizeObj) {
+ return {w: sizeObj.getWidth(), h: sizeObj.getHeight()}
+ } else {
+ var rect = el.getBoundingClientRect();
+ return {w: rect.width, h: rect.height}
+ }
+ };
+
if (hasClass(el, "html-widget-static-bound"))
return;
el.className = el.className + " html-widget-static-bound";
var initResult;
if (binding.initialize) {
- initResult = binding.initialize(el,
- sizeObj ? sizeObj.getWidth() : el.offsetWidth,
- sizeObj ? sizeObj.getHeight() : el.offsetHeight
- );
+ var size = getSize(el);
+ initResult = binding.initialize(el, size.w, size.h);
elementData(el, "init_result", initResult);
}
if (binding.resize) {
- var lastSize = {
- w: sizeObj ? sizeObj.getWidth() : el.offsetWidth,
- h: sizeObj ? sizeObj.getHeight() : el.offsetHeight
- };
+ var lastSize = getSize(el);
var resizeHandler = function(e) {
- var size = {
- w: sizeObj ? sizeObj.getWidth() : el.offsetWidth,
- h: sizeObj ? sizeObj.getHeight() : el.offsetHeight
- };
+ var size = getSize(el);
if (size.w === 0 && size.h === 0)
return;
if (size.w === lastSize.w && size.h === lastSize.h)
@@ -3915,7 +3914,6 @@
return result;
}
})();
-
+
+
@@ -9415,23 +9409,28 @@
Show the code
- corr_vars <- c ("total_pop" ,
- "med_inc" ,
- "percent_nonwhite" ,
- "percent_renters" ,
- "rent_burden" ,
- "ext_rent_burden" )
-
- corr_dat <- permits_bg %>% select (all_of (corr_vars)) %>% select (where (is.numeric)) %>% st_drop_geometry () %>% unique () %>% na.omit ()
-
- corr <- round (cor (corr_dat), 2 )
- p.mat <- cor_pmat (corr_dat)
-
-ggcorrplot (corr, p.mat = p.mat, hc.order = TRUE ,
- type = "full" , insig = "blank" , lab = TRUE , colors = c (palette[2 ], "white" , palette[3 ]))
+ corr_vars <- c ("total_pop" ,
+ "med_inc" ,
+ "percent_nonwhite" ,
+ "percent_renters" ,
+ "rent_burden" ,
+ "ext_rent_burden" )
+
+ corr_dat <- permits_bg %>% select (all_of (corr_vars), permits_count) %>% select (where (is.numeric)) %>% st_drop_geometry () %>% unique () %>% na.omit ()
+
+ corr <- round (cor (corr_dat), 2 )
+ p.mat <- cor_pmat (corr_dat)
+
+ggcorrplot (corr, p.mat = p.mat, hc.order = TRUE ,
+ type = "full" , insig = "blank" , lab = TRUE , colors = c (palette[2 ], "white" , palette[3 ])) +
+ annotate (
+ geom = "rect" ,
+ xmin = .5 , xmax = 7.5 , ymin = 4.5 , ymax = 5.5 ,
+ fill = "transparent" , color = "red" , alpha = 0.5
+ )
-
+
@@ -9441,57 +9440,57 @@
Show the code
- lisa <- permits_bg %>%
- filter (year == 2023 ) %>%
- mutate (nb = st_contiguity (geometry),
- wt = st_weights (nb),
- permits_lag = st_lag (permits_count, nb, wt),
- moran = local_moran (permits_count, nb, wt)) %>%
- tidyr:: unnest (moran) %>%
- mutate (pysal = ifelse (p_folded_sim <= 0.1 , as.character (pysal), NA ),
- hotspot = case_when (
- pysal == "High-High" ~ "Signficant" ,
- TRUE ~ "Not Signficant"
- ))
-
-#
-# palette <- c("High-High" = "#B20016",
-# "Low-Low" = "#1C4769",
-# "Low-High" = "#24975E",
-# "High-Low" = "#EACA97")
-
- morans_i <- tm_shape (lisa) +
- tm_polygons (col = "ii" , border.alpha = 0 , style = "jenks" , palette = 'viridis' )
-
- p_value <- tm_shape (lisa) +
- tm_polygons (col = "p_ii" , border.alpha = 0 , style = "jenks" , palette = '-viridis' )
-
- sig_hotspots <- tm_shape (lisa) +
- tm_polygons (col = "hotspot" , border.alpha = 0 , style = "cat" , palette = 'viridis' , textNA = "Not a Hotspot" )
-
-tmap_arrange (morans_i, p_value, sig_hotspots, ncol = 3 )
+ lisa <- permits_bg %>%
+ filter (year == 2023 ) %>%
+ mutate (nb = st_contiguity (geometry),
+ wt = st_weights (nb),
+ permits_lag = st_lag (permits_count, nb, wt),
+ moran = local_moran (permits_count, nb, wt)) %>%
+ tidyr:: unnest (moran) %>%
+ mutate (pysal = ifelse (p_folded_sim <= 0.1 , as.character (pysal), NA ),
+ hotspot = case_when (
+ pysal == "High-High" ~ "Signficant" ,
+ TRUE ~ "Not Signficant"
+ ))
+
+#
+# palette <- c("High-High" = "#B20016",
+# "Low-Low" = "#1C4769",
+# "Low-High" = "#24975E",
+# "High-Low" = "#EACA97")
+
+ morans_i <- tm_shape (lisa) +
+ tm_polygons (col = "ii" , border.alpha = 0 , style = "jenks" , palette = mono_5_green)
+
+ p_value <- tm_shape (lisa) +
+ tm_polygons (col = "p_ii" , border.alpha = 0 , style = "jenks" , palette = mono_5_green)
+
+ sig_hotspots <- tm_shape (lisa) +
+ tm_polygons (col = "hotspot" , border.alpha = 0 , style = "cat" , palette = c (palette[2 ], palette[3 ]), textNA = "Not a Hotspot" )
+
+tmap_arrange (morans_i, p_value, sig_hotspots, ncol = 3 )
-
+
Emergeging hotspots
Show the code
-# stc <- as_spacetime(permits_bg,
-# .loc_col = "geoid10",
-# .time_col = "year")
-#
-# # conduct EHSA
-# ehsa <- emerging_hotspot_analysis(
-# x = stc,
-# .var = "permits_count",
-# k = 1,
-# nsim = 5
-# )
-#
-# count(ehsa, classification)
+# stc <- as_spacetime(permits_bg %>% select(permits_count, geoid10, year) %>% na.omit(),
+# .loc_col = "geoid10",
+# .time_col = "year")
+#
+# # conduct EHSA
+# ehsa <- emerging_hotspot_analysis(
+# x = stc,
+# .var = "permits_count",
+# k = 1,
+# nsim = 5
+# )
+#
+# count(ehsa, classification)
@@ -9500,21 +9499,21 @@
Show the code
- permits_bg_long <- permits_bg %>%
- filter (! year %in% c (2024 )) %>%
- st_drop_geometry () %>%
- pivot_longer (
- cols = c (starts_with ("lag" ), dist_to_2022),
- names_to = "Variable" ,
- values_to = "Value"
- )
-
-
-ggscatter (permits_bg_long, x = "permits_count" , y = "Value" , facet.by = "Variable" ,
- add = "reg.line" ,
- add.params = list (color = palette[3 ], fill = palette[5 ]),
- conf.int = TRUE
- ) + stat_cor (method = "pearson" , p.accuracy = 0.001 , r.accuracy = 0.01 )
+ permits_bg_long <- permits_bg %>%
+ filter (! year %in% c (2024 )) %>%
+ st_drop_geometry () %>%
+ pivot_longer (
+ cols = c (starts_with ("lag" ), dist_to_2022),
+ names_to = "Variable" ,
+ values_to = "Value"
+ )
+
+
+ggscatter (permits_bg_long, x = "permits_count" , y = "Value" , facet.by = "Variable" ,
+ add = "reg.line" ,
+ add.params = list (color = palette[3 ], fill = palette[5 ]),
+ conf.int = TRUE
+ ) + stat_cor (method = "pearson" , p.accuracy = 0.001 , r.accuracy = 0.01 )
@@ -9536,94 +9535,147 @@
Show the code
- permits_train <- filter (permits_bg %>% select (- c (mapname, geoid10)), year < 2022 )
- permits_test <- filter (permits_bg %>% select (- c (mapname, geoid10)), year == 2022 )
- permits_validate <- filter (permits_bg %>% select (- c (mapname, geoid10)), year == 2023 )
- permits_predict <- filter (permits_bg %>% select (- c (mapname, geoid10)), year == 2024 )
-
- reg <- lm (permits_count ~ ., data = st_drop_geometry (permits_train))
-
- predictions <- predict (reg, permits_test)
- predictions <- cbind (permits_test, predictions)
-
- predictions <- predictions %>%
- mutate (abs_error = abs (permits_count - predictions),
- pct_error = abs_error / permits_count)
-
-ggplot (predictions, aes (x = permits_count, y = predictions)) +
- geom_point () +
- labs (title = "Predicted vs. Actual Permits" ,
- subtitle = "2022" ) +
- geom_smooth (method = "lm" , se = FALSE )
+ggplot (ols_preds, aes (x = permits_count, y = ols_preds)) +
+ geom_point () +
+ labs (title = "Predicted vs. Actual Permits" ,
+ subtitle = "2022" ) +
+ geom_smooth (method = "lm" , se = FALSE )
-
+
Show the code
- mae <- paste0 ("MAE: " , round (mean (predictions$ abs_error, na.rm = TRUE ), 2 ))
+ggplot (ols_preds, aes (x = abs_error)) +
+ geom_histogram () +
+ labs (title = "Distribution of Absolute Error per Block Group" ,
+ subtitle = "OLS, 2022" )
+
+
+
+
+
+Show the code
+ ols_mae <- paste0 ("MAE: " , round (mean (ols_preds$ abs_error, na.rm = TRUE ), 2 ))
-tm_shape (predictions) +
- tm_polygons (col = "abs_error" , border.alpha = 0 , palette = 'viridis' , style = "fisher" , colorNA = "lightgrey" ) +
+ ols_preds_map <- tm_shape (ols_preds) +
+ tm_polygons (col = "ols_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" ) +
tm_shape (broad_and_market) +
tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+ tm_layout (frame = FALSE )
+
+ ols_error_map <- tm_shape (ols_preds) +
+ tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
+
+tmap_arrange (ols_preds_map, ols_error_map)
-
+
We find that our OLS model has an MAE of only MAE: 2.66–not bad for such a simple model! Still, it struggles most in the areas where we most need it to succeed, so we will try to introduce better variables and apply a more complex model to improve our predictions.
-
- Random Forest Regression
+
+ Random Forest Regression: Testing
+We train and test up to 2022–we use this for model tuning and feature engineering.
Show the code
- rf <- randomForest (permits_count ~ .,
- data = st_drop_geometry (permits_train),
- importance = TRUE ,
- na.action = na.omit)
-
- rf_predictions <- predict (rf, permits_test)
- rf_predictions <- cbind (permits_test, rf_predictions)
- rf_predictions <- rf_predictions %>%
- mutate (abs_error = abs (permits_count - rf_predictions),
- pct_error = abs_error / (permits_count + 0.0001 ))
-
-tm_shape (rf_predictions) +
- tm_polygons (col = "rf_predictions" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+ test_preds_map <- tm_shape (rf_test_preds) +
+ tm_polygons (col = "rf_test_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
+
+ test_error_map <- tm_shape (rf_test_preds) +
+ tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
+
+tmap_arrange (test_preds_map, test_error_map)
-
+
Show the code
-ggplot (rf_predictions, aes (x = permits_count, y = rf_predictions)) +
- geom_point () +
- labs (title = "Predicted vs. Actual Permits" ,
- subtitle = "2022" ) +
- geom_smooth (method = "lm" , se = FALSE )
+ggplot (rf_test_preds, aes (x = abs_error)) +
+ geom_histogram () +
+ labs (title = "Distribution of Absolute Error per Block Group" ,
+ subtitle = "Random Forest, 2022" )
-
+
Show the code
- rf_mae <- paste0 ("MAE: " , round (mean (rf_predictions$ abs_error, na.rm = TRUE ), 2 ))
-
-tm_shape (rf_predictions) +
- tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+ggplot (rf_test_preds, aes (x = permits_count, y = rf_test_preds)) +
+ geom_point () +
+ labs (title = "Predicted vs. Actual Permits" ,
+ subtitle = "2022" ) +
+ geom_smooth (method = "lm" , se = FALSE )
-
+
+
+Show the code
+ rf_test_mae <- paste0 ("MAE: " , round (mean (rf_test_preds$ abs_error, na.rm = TRUE ), 2 ))
+
+
+
+
+ Random Forest Regression: Validation
+Having settled on our model features and tuning, we now validate on 2023 data.
+
+
+Show the code
+ val_preds_map <- tm_shape (rf_val_preds) +
+ tm_polygons (col = "rf_val_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
+
+ val_error_map <- tm_shape (rf_val_preds) +
+ tm_polygons (col = "abs_error" , border.alpha = 0 , palette = mono_5_orange, style = "fisher" , colorNA = "lightgrey" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
+
+tmap_arrange (val_preds_map, val_error_map)
+
+
+
+
+
+Show the code
+ggplot (rf_val_preds, aes (x = abs_error)) +
+ geom_histogram () +
+ labs (title = "Distribution of Absolute Error per Block Group" ,
+ subtitle = "Random Forest, 2023" )
+
+
+
+
+
+Show the code
+ggplot (rf_val_preds, aes (x = permits_count, y = rf_val_preds)) +
+ geom_point () +
+ labs (title = "Predicted vs. Actual Permits" ,
+ subtitle = "2023" ) +
+ geom_smooth (method = "lm" , se = FALSE )
+
+
+
+
+
+Show the code
+ rf_val_mae <- paste0 ("MAE: " , round (mean (rf_val_preds$ abs_error, na.rm = TRUE ), 2 ))
+
@@ -9635,23 +9687,16 @@
Show the code
- nbins <- as.integer (sqrt (nrow (rf_predictions)))
- vline <- mean (rf_predictions$ abs_error, na.rm = TRUE )
-
-ggplot (rf_predictions, aes (x = abs_error)) +
- geom_histogram (bins = nbins) +
- geom_vline (aes (xintercept = vline))
+ nbins <- as.integer (sqrt (nrow (rf_val_preds)))
+ vline <- mean (rf_val_preds$ abs_error, na.rm = TRUE )
+
+ggplot (rf_val_preds, aes (x = abs_error)) +
+ geom_histogram (bins = nbins) +
+ geom_vline (aes (xintercept = vline))
-
+
-
-Show the code
- hmm <- permits_bg %>%
- st_drop_geometry () %>%
- group_by (year) %>%
- summarize_all (.funs = list (~ sum (is.na (.)))) # Check NA for all columns
-
@@ -9659,78 +9704,78 @@
Show the code
- rf_predictions <- rf_predictions %>%
- mutate (race_comp = case_when (
- percent_nonwhite >= .50 ~ "Majority Non-White" ,
- TRUE ~ "Majority White"
- ))
-
-ggplot (rf_predictions, aes (y = abs_error, color = race_comp)) +
- geom_boxplot (fill = NA )
+ rf_val_preds <- rf_val_preds %>%
+ mutate (race_comp = case_when (
+ percent_nonwhite >= .50 ~ "Majority Non-White" ,
+ TRUE ~ "Majority White"
+ ))
+
+ggplot (rf_val_preds, aes (y = abs_error, color = race_comp)) +
+ geom_boxplot (fill = NA )
-
+
We find that error is not related to affordability and actually trends downward with percent nonwhite. (This is probably because there is less total development happening there in majority-minority neighborhoods to begin with, so the magnitude of error is less, even though proportionally it might be more.) Error increases slightly with total pop. This makes sense–more people –> more development.
Show the code
-ggplot (rf_predictions, aes (y = abs_error, x = rent_burden)) + # or whatever the variable is
- geom_point () +
- geom_smooth (method = "lm" , se= FALSE ) +
- theme_minimal ()
+ggplot (rf_val_preds, aes (y = abs_error, x = rent_burden)) + # or whatever the variable is
+ geom_point () +
+ geom_smooth (method = "lm" , se= FALSE ) +
+ theme_minimal ()
-
+
Show the code
-ggplot (rf_predictions, aes (y = abs_error, x = percent_nonwhite)) + # or whatever the variable is
- geom_point () +
- geom_smooth (method = "lm" , se= FALSE ) +
- theme_minimal ()
+ggplot (rf_val_preds, aes (y = abs_error, x = percent_nonwhite)) + # or whatever the variable is
+ geom_point () +
+ geom_smooth (method = "lm" , se= FALSE ) +
+ theme_minimal ()
-
+
Show the code
-ggplot (rf_predictions, aes (y = abs_error, x = total_pop)) + # or whatever the variable is
- geom_point () +
- geom_smooth (method = "lm" , se= FALSE ) +
- theme_minimal ()
+ggplot (rf_val_preds, aes (y = abs_error, x = total_pop)) + # or whatever the variable is
+ geom_point () +
+ geom_smooth (method = "lm" , se= FALSE ) +
+ theme_minimal ()
-
+
Show the code
-ggplot (rf_predictions, aes (y = abs_error, x = med_inc)) + # or whatever the variable is
- geom_point () +
- geom_smooth (method = "lm" , se= FALSE ) +
- theme_minimal ()
+ggplot (rf_val_preds, aes (y = abs_error, x = med_inc)) + # or whatever the variable is
+ geom_point () +
+ geom_smooth (method = "lm" , se= FALSE ) +
+ theme_minimal ()
-
+
How does this generalize across council districts? Don’t forget to refactor
Show the code
-suppressMessages (
- ggplot (rf_predictions, aes (x = reorder (district, abs_error, FUN = mean), y = abs_error)) +
- geom_boxplot (fill = NA ) +
- labs (title = "MAE by Council District" ,
- y = "Mean Absolute Error" ,
- x = "Council District" ) +
- theme_minimal () +
- theme ()
- )
+suppressMessages (
+ ggplot (rf_val_preds, aes (x = reorder (district, abs_error, FUN = mean), y = abs_error)) +
+ geom_boxplot (fill = NA ) +
+ labs (title = "MAE by Council District" ,
+ y = "Mean Absolute Error" ,
+ x = "Council District" ) +
+ theme_minimal () +
+ theme ()
+ )
-
+
@@ -9741,17 +9786,17 @@
Show the code
- filtered_zoning <- zoning %>%
- filter (str_detect (CODE, "RS" ) | str_detect (CODE, "I" ),
- CODE != "I2" ,
- ! str_detect (CODE, "SP" ))
-
-
-tm_shape (filtered_zoning) +
- tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+ filtered_zoning <- zoning %>%
+ filter (str_detect (CODE, "RS" ) | str_detect (CODE, "I" ),
+ CODE != "I2" ,
+ ! str_detect (CODE, "SP" ))
+
+
+tm_shape (filtered_zoning) +
+ tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
@@ -9761,85 +9806,85 @@
Show the code
- filtered_zoning <- st_join (
- filtered_zoning,
- rf_predictions %>% select (rf_predictions)
- )
-
-tm_shape (filtered_zoning) +
- tm_polygons (col = "rf_predictions" , border.alpha = 0 , colorNA = "lightgrey" , palette = mono_5_orange, style = "fisher" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+ filtered_zoning <- st_join (
+ filtered_zoning,
+ rf_val_preds %>% select (rf_val_preds)
+ )
+
+tm_shape (filtered_zoning) +
+ tm_polygons (col = "rf_val_preds" , border.alpha = 0 , colorNA = "lightgrey" , palette = mono_5_orange, style = "fisher" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
-
+
Show the code
-tmap_mode ('view' )
-
- filtered_zoning %>%
- filter (rf_predictions > 10 ) %>%
-tm_shape () +
- tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" ,
- popup.vars = c ('rf_predictions' , 'CODE' )) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+tmap_mode ('view' )
+
+ filtered_zoning %>%
+ filter (rf_val_preds > 10 ) %>%
+tm_shape () +
+ tm_polygons (col = "CODE" , border.alpha = 0 , colorNA = "lightgrey" ,
+ popup.vars = c ('rf_val_preds' , 'CODE' )) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
Furthermore, we can identify properties with high potential for assemblage, which suggests the ability to accomodate high-density, multi-unit housing.
Show the code
- nbs <- filtered_zoning %>%
- mutate (nb = st_contiguity (geometry))
-
-# Create edge list while handling cases with no neighbors
- edge_list <- tibble:: tibble (id = 1 : length (nbs$ nb), nbs = nbs$ nb) %>%
- tidyr:: unnest (nbs) %>%
- filter (nbs != 0 )
-
-# Create a graph with a node for each row in filtered_zoning
- g <- make_empty_graph (n = nrow (filtered_zoning))
-V (g)$ name <- as.character (1 : nrow (filtered_zoning))
-
-# Add edges if they exist
-if (nrow (edge_list) > 0 ) {
- edges <- as.matrix (edge_list)
- g <- add_edges (g, c (t (edges)))
- }
-
-# Calculate the number of contiguous neighbors, handling nodes without neighbors
- n_contiguous <- sapply (V (g)$ name, function (node) {
- if (node %in% edges) {
- length (neighborhood (g, order = 1 , nodes = as.numeric (node))[[1 ]])
- } else {
- 1 # Nodes without neighbors count as 1 (themselves)
- }
- })
-
- filtered_zoning <- filtered_zoning %>%
- mutate (n_contig = n_contiguous)
-
- filtered_zoning %>%
- st_drop_geometry () %>%
- select (rf_predictions,
- n_contig,
- OBJECTID,
- CODE) %>%
- filter (rf_predictions > 10 ,
- n_contig > 2 ) %>%
- arrange (desc (rf_predictions)) %>%
- kablerize (caption = "Poorly-Zoned Properties with High Development Risk" )
+ nbs <- filtered_zoning %>%
+ mutate (nb = st_contiguity (geometry))
+
+# Create edge list while handling cases with no neighbors
+ edge_list <- tibble:: tibble (id = 1 : length (nbs$ nb), nbs = nbs$ nb) %>%
+ tidyr:: unnest (nbs) %>%
+ filter (nbs != 0 )
+
+# Create a graph with a node for each row in filtered_zoning
+ g <- make_empty_graph (n = nrow (filtered_zoning))
+V (g)$ name <- as.character (1 : nrow (filtered_zoning))
+
+# Add edges if they exist
+if (nrow (edge_list) > 0 ) {
+ edges <- as.matrix (edge_list)
+ g <- add_edges (g, c (t (edges)))
+ }
+
+# Calculate the number of contiguous neighbors, handling nodes without neighbors
+ n_contiguous <- sapply (V (g)$ name, function (node) {
+ if (node %in% edges) {
+ length (neighborhood (g, order = 1 , nodes = as.numeric (node))[[1 ]])
+ } else {
+ 1 # Nodes without neighbors count as 1 (themselves)
+ }
+ })
+
+ filtered_zoning <- filtered_zoning %>%
+ mutate (n_contig = n_contiguous)
+
+ filtered_zoning %>%
+ st_drop_geometry () %>%
+ select (rf_val_preds,
+ n_contig,
+ OBJECTID,
+ CODE) %>%
+ filter (rf_val_preds > 10 ,
+ n_contig > 2 ) %>%
+ arrange (desc (rf_val_preds)) %>%
+ kablerize (caption = "Poorly-Zoned Properties with High Development Risk" )
@@ -10225,49 +10172,45 @@
Show the code
-tmap_mode ('view' )
-
- filtered_zoning %>%
- select (rf_predictions,
- n_contig,
- OBJECTID,
- CODE) %>%
- filter (rf_predictions > 10 ,
- n_contig > 2 ) %>%
-tm_shape () +
- tm_polygons (col = "rf_predictions" , border.alpha = 0 , colorNA = "lightgrey" , palette = "viridis" , style = "fisher" ,
- popup.vars = c ('rf_predictions' , 'n_contig' , 'CODE' ), alpha = 0.5 ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+tmap_mode ('view' )
+
+ filtered_zoning %>%
+ select (rf_val_preds,
+ n_contig,
+ OBJECTID,
+ CODE) %>%
+ filter (rf_val_preds > 10 ,
+ n_contig > 2 ) %>%
+tm_shape () +
+ tm_polygons (col = "rf_val_preds" , border.alpha = 0 , colorNA = "lightgrey" , palette = "viridis" , style = "fisher" ,
+ popup.vars = c ('rf_val_preds' , 'n_contig' , 'CODE' ), alpha = 0.5 ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
-
- 2024 Predictions
-Just for shits and giggles, throw in 2024 predictions. (Can use data from 2023.)
+
+ Random Forest Regression: Projection
+Just for shits and giggles, we can now predict for 2024.
Show the code
- rf_predictions_2024 <- predict (rf, permits_predict)
- rf_predictions_2024 <- cbind (permits_predict, rf_predictions_2024)
-
-
-tm_shape (rf_predictions_2024) +
- tm_polygons (col = "rf_predictions_2024" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" ) +
- tm_shape (broad_and_market) +
- tm_lines (col = "lightgrey" ) +
- tm_layout (frame = FALSE )
+tmap_mode ('plot' )
+
+tm_shape (rf_proj_preds) +
+ tm_polygons (col = "rf_proj_preds" , border.alpha = 0 , palette = mono_5_green, style = "fisher" , colorNA = "lightgrey" ) +
+ tm_shape (broad_and_market) +
+ tm_lines (col = "lightgrey" ) +
+ tm_layout (frame = FALSE )
-
-
-
+