Skip to content

Commit

Permalink
Merge pull request #27 from EvolEcolGroup/cran_sub
Browse files Browse the repository at this point in the history
Cran 0.9.3
  • Loading branch information
dramanica authored Jan 18, 2024
2 parents fbc7095 + d7a9c5f commit b367aeb
Show file tree
Hide file tree
Showing 39 changed files with 606 additions and 71 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: tidysdm
Title: Species Distribution Models with Tidymodels
Version: 0.9.2
Version: 0.9.3
Authors@R: c(
person("Michela", "Leonardi", role = "aut"),
person("Margherita", "Colucci", role = "aut"),
Expand Down Expand Up @@ -48,14 +48,14 @@ Imports:
yardstick
Suggests:
blockCV,
data.table,
doParallel,
earth,
kernlab,
knitr,
overlapping,
pastclim (>= 2.0.0),
ranger,
readr,
rmarkdown,
spelling,
stacks,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ S3method(boyce_cont,data.frame)
S3method(boyce_cont,sf)
S3method(brier_class,sf)
S3method(classification_cost,sf)
S3method(collect_metrics,repeat_ensemble)
S3method(collect_metrics,simple_ensemble)
S3method(explain_tidysdm,default)
S3method(explain_tidysdm,repeat_ensemble)
S3method(explain_tidysdm,simple_ensemble)
Expand Down Expand Up @@ -106,3 +108,4 @@ importFrom(ggplot2,autoplot)
importFrom(magrittr,"%>%")
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(tune,collect_metrics)
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# `tidysdm` 0.9.3

* fix bug in `filter_high_cor` due to changes in `terra` 1.6.75
* implement `collect_metrics` for ensembles.

# `tidysdm` 0.9.2

* Release on CRAN
Expand Down
8 changes: 5 additions & 3 deletions R/autoplot_simple_ensemble.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
#' the results. If none is given, the first metric in the metric set is used
#' (after filtering by the `metric` option).
#' @param metric A character vector for which metrics (apart from `rank_metric`)
#' to be included in the visualization.
#' to be included in the visualization. If NULL (the default), all available
#' metrics will be plotted
#' @param std_errs The number of standard errors to plot (if the standard error
#' exists).
#' @param ... Other options to pass to `autoplot()`. Currently unused.
Expand All @@ -28,11 +29,12 @@
#' (95% confidence, by default).
#' @returns A ggplot object.
#' @examples
#' # we use the two_class_example from `workflowsets`
#' \donttest{
#' #' # we use the two_class_example from `workflowsets`
#' two_class_ens <- simple_ensemble() %>%
#' add_member(two_class_res, metric = "roc_auc")
#' autoplot(two_class_ens)
#'
#' }
#' @export
autoplot.simple_ensemble <- function(object, rank_metric = NULL, metric = NULL,
std_errs = stats::qnorm(0.95), ...) {
Expand Down
23 changes: 19 additions & 4 deletions R/blockcv2rsample.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
#' Convert an object created with `blockCV` to an `rsample` object
#'
#' This function creates objects created with `blockCV` to `rsample` objects
#' that can be used by `tidysdm`
#' that can be used by `tidysdm`. BlockCV provides more sophisticated sampling
#' options than the `spatialsample` library. For example, it is possible to
#' stratify the sampling to ensure that presences and absences are evenly
#' distributed among the folds (see the example below).
#'
#' Note that currently only objects of type `cv_spatial` and `cv_cluster` are
#' supported.
#'
#' @param x a object created with a `blockCV` function
#' @param data the `sf` object used to create `x`
#' @returns an `rsample` object
Expand All @@ -13,22 +20,30 @@
#' pa_data <- sf::st_as_sf(points, coords = c("x", "y"), crs = 7845)
#' sb1 <- cv_spatial(
#' x = pa_data,
#' column = "occ", # the response column (binary or multi-class)
#' column = "occ", # the response column to balance the folds
#' k = 5, # number of folds
#' size = 350000, # size of the blocks in metres
#' selection = "random", # random blocks-to-fold
#' iteration = 10
#' ) # find evenly dispersed folds
#' sb1_rsample <- blockcv2rsample(sb1, pa_data)
#' class(sb1_rsample)
#' autoplot(sb1_rsample)
#' }
blockcv2rsample <- function(x, data) {
if(!(any(inherits(x,"cv_spatial"),
inherits(x,"cv_cluster")))){
stop("this function does support this object type\n",
"only objects of class cv_spatial or cv_cluster are supported.")
}
splits <- lapply(
x$folds_list,
function(this_fold) {
names(this_fold) <- c("analysis", "assessment")
rsample::make_splits(this_fold, data = data)
rsample::make_splits(this_fold, data = data, class = "spatial_rsplit")
}
)
rsample::manual_rset(splits, ids = paste0("Fold", seq(1:length(splits))))
rsample::new_rset(splits, ids = paste0("Fold", seq(1:length(splits))),
attrib = NULL, subclass = c("spatial_rset", "rset"))

}
47 changes: 40 additions & 7 deletions R/check_split_balance.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#' Check the balance of presences vs pseudoabsences among splits
#'
#' @param splits the data splits (an `rset` object), generated by a function such as
#' @param splits the data splits (an `rset` or `split` object), generated by a function such as
#' [spatialsample::spatial_block_cv()]
#' @param .col the column containing the presences
#' @returns a table of number of presences and pseudoabsences
#' @returns a tibble of the number of presences and pseudoabsences in the assessment
#' and analysis
#' set of each split (or training and testing in an initial split)
#' @export
#' @examples
#' lacerta_thin <- readRDS(system.file("extdata/lacerta_climate_sf.RDS",
Expand All @@ -18,10 +20,41 @@ check_splits_balance <- function(
.col <- rlang::enquo(.col) %>%
rlang::quo_get_expr() %>%
rlang::as_string()
balance_list <- lapply(splits$splits, function(x) {
table(rsample::training(x) %>%
sf::st_drop_geometry() %>% dplyr::pull(.col))
})
balance_df <- do.call("rbind", balance_list)

if (inherits(splits,"rset")){
if (!(.col %in% names(splits$splits[[1]]$data))){
stop(".col should be a column in the data used to generate the splits")
}
training_list <- lapply(splits$splits, function(x) {
table(rsample::training(x) %>%
sf::st_drop_geometry() %>% dplyr::pull(.col))
})
training_df <- do.call("rbind", training_list)
testing_list <- lapply(splits$splits, function(x) {
table(rsample::testing(x) %>%
sf::st_drop_geometry() %>% dplyr::pull(.col))
})
testing_df <- do.call("rbind", testing_list)
dimnames(testing_df)[[2]] <- paste0(dimnames(testing_df)[[2]],"_analysis")
dimnames(training_df)[[2]] <- paste0(dimnames(training_df)[[2]],"_assessment")

balance_df <- dplyr::bind_cols(training_df, testing_df)
} else if (inherits(splits,"rsplit")) {
if (!(.col %in% names(splits$data))){
stop(".col should be a column in the data used to generate the splits")
}
training_df <- table(rsample::training(splits) %>%
sf::st_drop_geometry() %>% dplyr::pull(.col))
testing_df <- table(rsample::testing(splits) %>%
sf::st_drop_geometry() %>% dplyr::pull(.col))
# coerce to a df with only one row
balance_df <- as.data.frame(matrix(c(training_df, testing_df),nrow=1))
names(balance_df) <- c(paste0(names(testing_df),"_test"),
paste0(names(training_df),"_train"))
balance_df <- tibble::as_tibble(balance_df)
} else {
stop ("splits should be either a spatial_rset or a spatial_rsplit")
}

return(balance_df)
}
35 changes: 35 additions & 0 deletions R/collect_metrics.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#' Obtain and format results produced by tuning functions for ensemble objects
#'
#' Return a tibble of performance metrics for all models.
#'
#' @param x A [`simple_ensemble`] or [`repeat_ensemble`] object
#' @param ... Not currently used.
#' @return A tibble.
#' @details
#'
#' When applied to a ensemble, the metrics that are returned
#' do not contain the actual tuning parameter columns and values (unlike when
#' these collect functions are run on other objects). The reason is that ensembles
#' contain different types of models or models with different tuning
#' parameters.
#'
#' @seealso [tune::collect_metrics()]
#'
#' @examples
#' collect_metrics(lacerta_ensemble)
#' collect_metrics(lacerta_rep_ens)
#' @importFrom tune collect_metrics
#' @export

collect_metrics.simple_ensemble <- function(x, ...) {
dplyr::bind_rows(x$metrics)
}

#' @export
#' @rdname collect_metrics.simple_ensemble
collect_metrics.repeat_ensemble <- function(x, ...) {
metric_table <- dplyr::bind_rows(x$metrics, .id = "rep_id")
metric_table$rep_id <- x$rep_id[as.numeric(metric_table$rep_id)]
return(metric_table)
}

9 changes: 8 additions & 1 deletion R/datasets_docs.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,14 @@

#' A simple ensemble for the lacerta data
#'
#' Ensemble SDM for *Lacerta schreiberi*.
#' Ensemble SDM for *Lacerta schreiberi*, as generated in the vignette.
#'
#' @format A [`simple_ensemble`] object
"lacerta_ensemble"

#' A repeat ensemble for the lacerta data
#'
#' Ensemble SDM for *Lacerta schreiberi*, as generated in the vignette.
#'
#' @format A [`repeat_ensemble`] object
"lacerta_rep_ens"
22 changes: 22 additions & 0 deletions R/out_of_range_warning.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' Warn if some times are outside the range of time steps from a raster
#'
#' This function helps making sure that, when we assign times to time_step
#' layers of a raster, we do not have values which are badly out of range
#' @param times the times of the locations
#' @param time_steps the time steps from the raster
#' @returns NULL return
#' @keywords internal

out_of_range_warning <- function(times, time_steps){
time_steps_ordered<-sort(time_steps)
range_minmax <- c(utils::head(time_steps_ordered,n=1)[1]-
(abs(utils::head(time_steps_ordered,n=2)[1]-utils::head(time_steps_ordered,n=2)[2])/2),
utils::tail(time_steps_ordered,n=1)[1] +
(abs(utils::tail(time_steps_ordered,n=2)[1]-utils::tail(time_steps_ordered,n=2)[2])/2))
if(any(times<range_minmax[1]) | any(times>range_minmax[2])) {
warning("Some dates are out of the range of the available time series.\n",
"They will be assigned to the most extreme time point available, but this\n",
"might not make sense. The potentially problematic dates are:\n",
times[which((times<range_minmax[1]) | (times>range_minmax[2]))])
}
}
9 changes: 9 additions & 0 deletions R/sample_pseudoabs.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,20 @@ sample_pseudoabs <- function(data, raster, n, coords = NULL,
coords <- check_coords_names(data, coords)
dist_min <- dist_max <- NULL
if (method[1] == "dist_disc") {
if (length(method)!=3){
stop("method 'dist_disc' should have two thresholds, e.g. c('dist_disc',10,20)")
}
dist_min <- as.numeric(method[2])
dist_max <- as.numeric(method[3])
} else if (method[1] == "dist_min") {
if (length(method)!=2){
stop("method 'dist_min' should have one threshold, e.g. c('dist_min',10)")
}
dist_min <- as.numeric(method[2])
} else if (method[1] == "dist_max") {
if (length(method)!=2){
stop("method 'dist_max' should have one threshold, e.g. c('dist_max',50)")
}
dist_max <- as.numeric(method[2])
} else if (!method[1] %in% "random") {
stop("method has to be one of 'random', 'dist_min', 'dist_max', or 'dist_disc'")
Expand Down
Loading

0 comments on commit b367aeb

Please sign in to comment.