From e3f6a4265c438be5b3dd1d536611fde139f63257 Mon Sep 17 00:00:00 2001 From: Andrea Manica Date: Thu, 31 Oct 2024 09:13:45 +0000 Subject: [PATCH 1/3] allow for geometry column with different names --- R/recipe_sf.R | 2 -- R/recipes_sf_methods.R | 39 ++++++++++++++++++++++++--------- inst/WORDLIST | 2 ++ tests/testthat/test_recipe_sf.R | 35 +++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 12 deletions(-) diff --git a/R/recipe_sf.R b/R/recipe_sf.R index f1da99a2..3a4aa7f6 100644 --- a/R/recipe_sf.R +++ b/R/recipe_sf.R @@ -41,8 +41,6 @@ recipe.sf <- function(x, ...) { sf::st_drop_geometry() } - - rec <- recipe(x, ...) %>% update_role(dplyr::any_of(c("X", "Y")), new_role = "coords") class(rec) <- c("spatial_recipe", class(rec)) diff --git a/R/recipes_sf_methods.R b/R/recipes_sf_methods.R index e67a1aee..7fb67d59 100644 --- a/R/recipes_sf_methods.R +++ b/R/recipes_sf_methods.R @@ -3,16 +3,26 @@ prep.spatial_recipe <- function(x, training = NULL, fresh = FALSE, verbose = FAL retain = TRUE, log_changes = FALSE, strings_as_factors = TRUE, ...) { if (!is.null(training)) { - # if we have a geometry - if ("geometry" %in% names(training)) { - ## convert_geometry_column + # sometimes we have a tibble with a geometry column but not cast into an sf object + # (this is the case when running a workflow) + + geom_col <- names(training)[unlist(lapply(training, inherits, "sfc"))] + if (length(geom_col) > 0) { + geom_col <- geom_col[1] training <- training %>% - dplyr::bind_cols(sf::st_coordinates(training$geometry)) %>% + dplyr::bind_cols(sf::st_coordinates(training[[geom_col]])) %>% sf::st_drop_geometry() + +# # if we have a geometry +# if ("geometry" %in% names(training)) { +# ## convert_geometry_column +# training <- training %>% +# dplyr::bind_cols(sf::st_coordinates(training$geometry)) %>% +# sf::st_drop_geometry() } # Add dummy X and Y if they are not already present if (!all(c("X", "Y") %in% names(training))) { - training <- training %>% dplyr::mutate(X = NA, Y = NA) + training <- training %>% dplyr::mutate(X = NA_real_, Y = NA_real_) } } NextMethod(generic="prep", @@ -26,18 +36,27 @@ prep.spatial_recipe <- function(x, training = NULL, fresh = FALSE, verbose = FAL bake.spatial_recipe <- function(object, new_data, ..., composition = "tibble") { ## convert_geometry_column if (!is.null(new_data)) { - if ("geometry" %in% names(new_data)) { - ## convert_geometry_column + # sometimes we have a tibble with a geometry column but not cast into an sf object + # (this is the case when running a workflow) + geom_col <- names(new_data)[unlist(lapply(new_data, inherits, "sfc"))] + if (length(geom_col) > 0) { + geom_col <- geom_col[1] new_data <- new_data %>% - dplyr::bind_cols(sf::st_coordinates(new_data$geometry)) %>% + dplyr::bind_cols(sf::st_coordinates(new_data[[geom_col]])) %>% sf::st_drop_geometry() + + # if ("geometry" %in% names(new_data)) { + # ## convert_geometry_column + # new_data <- new_data %>% + # dplyr::bind_cols(sf::st_coordinates(new_data$geometry)) %>% + # sf::st_drop_geometry() } # Add dummy X and Y if they are not already present if (!all(c("X", "Y") %in% names(new_data))) { - new_data <- new_data %>% dplyr::mutate(X = NA, Y = NA) + new_data <- new_data %>% dplyr::mutate(X = NA_real_, Y = NA_real_) } } - NextMethod(generic="bake", object = object, + NextMethod(generic="bake", object = object, new_data = sf::st_drop_geometry(new_data), ..., composition = composition) } diff --git a/inst/WORDLIST b/inst/WORDLIST index a7b0574a..83037d2c 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -33,6 +33,7 @@ SSP Schmiegelow Schoener Schoener's +SpatRaster StackOverflow TSS Tidymodels @@ -110,6 +111,7 @@ stackexchange stackoverflow stat's svm +terra thres tibble tibbles diff --git a/tests/testthat/test_recipe_sf.R b/tests/testthat/test_recipe_sf.R index 59455ebc..59ea3c22 100644 --- a/tests/testthat/test_recipe_sf.R +++ b/tests/testthat/test_recipe_sf.R @@ -44,3 +44,38 @@ test_that("sdm_recipe_sf", { # X should just be a dummy variable expect_true(all(is.na(baked_no_xy$X))) }) + +test_that("sdm_recipe_sf works with a geometry named differently", { + lacerta_thin <- readRDS(system.file("extdata/lacerta_climate_sf.RDS", + package = "tidysdm" + )) + sf::st_geometry(lacerta_thin) <- "geom" + lacerta_rec <- recipe(lacerta_thin, formula = class ~ .) + + lacerta_models <- + # create the workflow_set + workflow_set( + preproc = list(default = lacerta_rec), + models = list( + # the standard glm specs + glm = sdm_spec_glm(), + # rf specs with tuning + rf = sdm_spec_rf() + ), + # make all combinations of preproc and models, + cross = TRUE + ) %>% + # tweak controls to store information needed later to create the ensemble + option_add(control = control_ensemble_grid()) + set.seed(100) + lacerta_cv <- spatial_block_cv(data = lacerta_thin, v = 3, n = 5) + lacerta_models <- + lacerta_models %>% + workflow_map("tune_grid", + resamples = lacerta_cv, grid = 3, + metrics = sdm_metric_set(), verbose = FALSE + ) + res <- collect_notes(.Last.tune.result) %>% dplyr::distinct(type, note) + # expect no warnings + expect_true(nrow(res) == 0) +}) From 3bf6581264f98f60d2ac600ff5cc13ae772a894c Mon Sep 17 00:00:00 2001 From: Andrea Manica Date: Thu, 31 Oct 2024 09:20:00 +0000 Subject: [PATCH 2/3] get geometry name from sf --- R/recipes_sf_methods.R | 13 ++++++++++--- tests/testthat/test_recipe_sf.R | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/R/recipes_sf_methods.R b/R/recipes_sf_methods.R index 7fb67d59..aca1d0df 100644 --- a/R/recipes_sf_methods.R +++ b/R/recipes_sf_methods.R @@ -5,8 +5,11 @@ prep.spatial_recipe <- function(x, training = NULL, fresh = FALSE, verbose = FAL if (!is.null(training)) { # sometimes we have a tibble with a geometry column but not cast into an sf object # (this is the case when running a workflow) - - geom_col <- names(training)[unlist(lapply(training, inherits, "sfc"))] + if (inherits(training,"sf")){ + geom_col <- attributes(training)$sf_column + } else { + geom_col <- names(training)[unlist(lapply(training, inherits, "sfc"))] + } if (length(geom_col) > 0) { geom_col <- geom_col[1] training <- training %>% @@ -38,7 +41,11 @@ bake.spatial_recipe <- function(object, new_data, ..., composition = "tibble") { if (!is.null(new_data)) { # sometimes we have a tibble with a geometry column but not cast into an sf object # (this is the case when running a workflow) - geom_col <- names(new_data)[unlist(lapply(new_data, inherits, "sfc"))] + if (inherits(new_data,"sf")){ + geom_col <- attributes(new_data)$sf_column + } else { + geom_col <- names(new_data)[unlist(lapply(new_data, inherits, "sfc"))] + } if (length(geom_col) > 0) { geom_col <- geom_col[1] new_data <- new_data %>% diff --git a/tests/testthat/test_recipe_sf.R b/tests/testthat/test_recipe_sf.R index 59ea3c22..fd7845f4 100644 --- a/tests/testthat/test_recipe_sf.R +++ b/tests/testthat/test_recipe_sf.R @@ -76,6 +76,6 @@ test_that("sdm_recipe_sf works with a geometry named differently", { metrics = sdm_metric_set(), verbose = FALSE ) res <- collect_notes(.Last.tune.result) %>% dplyr::distinct(type, note) - # expect no warnings + # expect no errors expect_true(nrow(res) == 0) }) From 9647d77c164d13cd3f834e8b452ce8fa45d4429d Mon Sep 17 00:00:00 2001 From: Andrea Manica Date: Fri, 1 Nov 2024 09:04:45 +0000 Subject: [PATCH 3/3] clean up old code --- R/recipes_sf_methods.R | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/R/recipes_sf_methods.R b/R/recipes_sf_methods.R index aca1d0df..35270069 100644 --- a/R/recipes_sf_methods.R +++ b/R/recipes_sf_methods.R @@ -15,13 +15,6 @@ prep.spatial_recipe <- function(x, training = NULL, fresh = FALSE, verbose = FAL training <- training %>% dplyr::bind_cols(sf::st_coordinates(training[[geom_col]])) %>% sf::st_drop_geometry() - -# # if we have a geometry -# if ("geometry" %in% names(training)) { -# ## convert_geometry_column -# training <- training %>% -# dplyr::bind_cols(sf::st_coordinates(training$geometry)) %>% -# sf::st_drop_geometry() } # Add dummy X and Y if they are not already present if (!all(c("X", "Y") %in% names(training))) { @@ -47,16 +40,11 @@ bake.spatial_recipe <- function(object, new_data, ..., composition = "tibble") { geom_col <- names(new_data)[unlist(lapply(new_data, inherits, "sfc"))] } if (length(geom_col) > 0) { + # if we have multiple geometry column, we use the first one geom_col <- geom_col[1] new_data <- new_data %>% dplyr::bind_cols(sf::st_coordinates(new_data[[geom_col]])) %>% sf::st_drop_geometry() - - # if ("geometry" %in% names(new_data)) { - # ## convert_geometry_column - # new_data <- new_data %>% - # dplyr::bind_cols(sf::st_coordinates(new_data$geometry)) %>% - # sf::st_drop_geometry() } # Add dummy X and Y if they are not already present if (!all(c("X", "Y") %in% names(new_data))) {