From 6702cacd1de0bfdeb407a5fb49b86f58ff27d3df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= Date: Fri, 8 Sep 2023 15:28:25 -0400 Subject: [PATCH 01/10] be explicit about the possible methods and exact since there is some fuzzy matching for the method names --- scripts/SDM/selectBackgroundFunc.R | 430 ++++++++++++++--------------- 1 file changed, 215 insertions(+), 215 deletions(-) diff --git a/scripts/SDM/selectBackgroundFunc.R b/scripts/SDM/selectBackgroundFunc.R index eea36c95..875d3b65 100644 --- a/scripts/SDM/selectBackgroundFunc.R +++ b/scripts/SDM/selectBackgroundFunc.R @@ -1,215 +1,215 @@ - - -#' @title Create background points -#' -#' @name create_background -#' @param predictors spat raster, containing the predictor variables -#' @param mask, spat vector, mask to apply to the predictors. -#' @param method string, random, inclusion_buffer (thickening or biased), or raster (unweighted or weighted) -#' @param n integer, number of background points to select. -#' @param obs data.frame, containing the observations. Used with method == "thickening" or "inclusion_buffer" -#' @param width_buffer int, buffer width around observations. Used with method == "inclusion_buffer" -#' @param species string, species name. -#' @param raster SpatRaster, raster heatmap used for weighted or unweighted sampling, default NULL when not using those methods -#' @return spatial points -#' @export - -create_background <- function( - predictors, - mask = NULL, - method = "random", - n = 10000, - obs = NULL, - density_bias = NULL, - width_buffer = NULL, - raster = NULL) { - - proj <- terra::crs(predictors, proj = T) - - ## New method: If we use raster, we re-project our raster and add it as an additional layer - if (grepl("raster", method)) { - if (is.null(raster)) stop(paste("No raster included with method:", method)) - - # Bilinear interpolation when projecting in this manner - # Exclude 0s, since we don't want to sample areas with no sightings - raster[raster <= 0] <- NA - - add(predictors) <- raster - - } - - if (!is.null(mask)) { - predictors <- fast_crop(predictors, mask) - } - - sum_na_layer <- terra::tapp(predictors, index = rep(1, terra::nlyr(predictors)), fun = sum, na.rm = F) - - ncellr <- terra::global(!is.na(sum_na_layer), sum) - - if (ncellr < n) { - message( - "Number of background-points exceeds number of cell. ", - ncellr, - " background-points will be sampled" - ) - backgr <- terra::as.data.frame(predictors, na.rm = TRUE, xy = TRUE) |> dplyr::select(x, y) - - } else { - - if (any(method == "random" | grepl("^unweighted", method))) { - - # all the cells have the same probability to be selected - message(sprintf("Selecting %i background point based on %s method.", n, method )) - - # Apparently this breaks if there are a lot of NA values.. using a different version for now - if (0) { - # This version has issues sampling rasters with a lot (or mostly) NA value cells - backgr <- terra::spatSample(sum_na_layer, - size = n, - method="random", - replace = FALSE, - na.rm = T, - xy = TRUE, - as.points = FALSE, - values = F) - }else{ - # cells only retrieves non-NA cells - backgr <- sample(cells(sum_na_layer), n) - backgr <- xyFromCell(sum_na_layer,backgr) - - } - } else if (any(method == "thickening")) { - - message(sprintf("Selecting %i background point based on %s method.", n, method )) - - obs_vec <- terra::vect(obs, geom = c("lon", "lat"), crs = crs(predictors)) - - if (is.null(width_buffer)) { - width_buffer <- mean(terra::distance(obs_vec)) - } else { - width_buffer <- as.numeric(width_buffer) - } - buf <- terra::buffer(obs_vec, width_buffer, quadsegs = 10) - buf_r <- !is.na(terra::rasterize(buf[1], sum_na_layer)) - for (i in 2:nrow(buf)) { - buf_r <- buf_r + !is.na(terra::rasterize(buf[i], sum_na_layer)) - } - buf_r <- terra::mask(buf_r, sum_na_layer) - backgr <- terra::spatSample(buf_r, - size = n, - method = "weighted", - replace = FALSE, - na.rm = T, - xy = TRUE, - as.points = FALSE, - values = F) - - } else if (any(method == "biased")) { - - if (!is.null(mask)) density_bias <- fast_crop(density_bias, mask) - - backgr <- terra::spatSample(density_bias, - size = n, - method = "weighted", - replace = FALSE, - na.rm = T, - xy = TRUE, - as.points = FALSE, - values = F) - - } else if (method == "inclusion_buffer") { - - obs_vec <- terra::vect(obs, geom = c("lon", "lat"), crs = crs(predictors)) - - if (is.null(dist_buffer)) { - - message("Buffer distance not provided. Using the 95% quantile - of the minimum distance between each point.") - dist_buffer <- calculate_dist_buffer(obs |> dplyr::select(lon, lat)) - message(sprintf("Buffer distance: %s (unit of projection)", dist_buffer)) - - } - - # Creating the buffer - - buffer_shape <- rgeos::gBuffer(spgeom = as(obs_vec, "Spatial"), - byid = FALSE, width = dist_buffer) - - # crops the predictors to that shape to rasterize - sum_na_layer <- fast_crop(sum_na_layer, buffer_shape) - - message(sprintf("Trying selecting %i background point based on %s method.", n ,method )) - backgr <- terra::spatSample(sum_na_layer, - size = n, - method="random", - replace=FALSE, - na.rm=T, - xy=TRUE, - as.points=FALSE, - values=F) - - } else if (grepl("^weighted", method)) { - ## Set heatmap cells to NA if they are NA in sum_na_layer - ## Assumes the last layer of predictors is the heatmap, and that it is counts! - values(predictors[[dim(predictors)[3]]])[which(is.na(values(sum_na_layer)))] <- NA - tgb_weights <- predictors[[dim(predictors)[3]]] - tgb_weights <- tgb_weights/sum(values(tgb_weights), na.rm = T) - - #backgr <- sample(cells(tgb_weights), n, prob = unlist(extract(tgb_weights, cells(tgb_weights)))) - #backgr <- xyFromCell(tgb_weights, backgr) - - # Sampling using density as probabilities - # We still run into NA values in the weights, which we should exclude - weightsVector <- unlist(extract(tgb_weights, cells(tgb_weights))) - names(weightsVector) <- cells(tgb_weights) - weightsVector <- na.omit(weightsVector) - - if(length(weightsVector) < n){ - message( - "Number of background-points exceeds number of cell. ", - ncellr, - " background-points will be sampled" - ) - backgr <- names(weightsVector) - - }else{ - backgr <- sample(names(weightsVector), n, prob = weightsVector) - - } - - backgr <- xyFromCell(tgb_weights, as.numeric(backgr)) - } - } - message(sprintf("%s selected", nrow(backgr))) - - #species <- unique(obs$scientific_name) This didn't work if there are more than one equivalent species names - species <- obs$scientific_name[1] - backgr <- dplyr::bind_cols(id = 1:nrow(backgr), - scientific_name = species, - backgr |> data.frame()) |> - setNames(c("id", "scientific_name", "lon", "lat")) - - return(backgr) -} - - -calculate_dist_buffer <- function(obs, n = 1000) { - #Uses the first 1000 points (randomly sampled) to create buffers and distances - if (nrow(obs) > n) { - nb_buffer_point <- n - } else { - nb_buffer_point <- nrow(obs) - 1 - } - - sample_locations <- obs[sample(c(1:nrow(obs)), - size = nb_buffer_point, replace = FALSE), ] - #Uses the 95% quantile of the minimum distance between each point - distance <- raster::pointDistance(sample_locations, lonlat = FALSE) - mindist <- c() - for (q in 1:ncol(distance)) { - distance_zero <- distance[which(distance[, q] > 0), q] - mindist <- c(mindist, min(distance_zero)) - } - dist_buffer <- 2 * stats::quantile(mindist, 0.95) - return(dist_buffer) -} + + +#' @title Create background points +#' +#' @name create_background +#' @param predictors spat raster, containing the predictor variables +#' @param mask, spat vector, mask to apply to the predictors. +#' @param method string, random, inclusion_buffer (thickening or biased), or raster (unweighted or weighted) +#' @param n integer, number of background points to select. +#' @param obs data.frame, containing the observations. Used with method == "thickening" or "inclusion_buffer" +#' @param width_buffer int, buffer width around observations. Used with method == "inclusion_buffer" +#' @param species string, species name. +#' @param raster SpatRaster, raster heatmap used for weighted or unweighted sampling, default NULL when not using those methods +#' @return spatial points +#' @export + +create_background <- function( + predictors, + mask = NULL, + method = c("random","weighted_raster","unweighted_raster","inclusion_buffer","biased","thickening"), + n = 10000, + obs = NULL, + density_bias = NULL, + width_buffer = NULL, + raster = NULL) { + + proj <- terra::crs(predictors, proj = T) + + ## New method: If we use raster, we re-project our raster and add it as an additional layer + if (grepl("raster", method)) { + if (is.null(raster)) stop(paste("No raster included with method:", method)) + + # Bilinear interpolation when projecting in this manner + # Exclude 0s, since we don't want to sample areas with no sightings + raster[raster <= 0] <- NA + + add(predictors) <- raster + + } + + if (!is.null(mask)) { + predictors <- fast_crop(predictors, mask) + } + + sum_na_layer <- terra::tapp(predictors, index = rep(1, terra::nlyr(predictors)), fun = sum, na.rm = F) + + ncellr <- terra::global(!is.na(sum_na_layer), sum) + + if (ncellr < n) { + message( + "Number of background-points exceeds number of cell. ", + ncellr, + " background-points will be sampled" + ) + backgr <- terra::as.data.frame(predictors, na.rm = TRUE, xy = TRUE) |> dplyr::select(x, y) + + } else { + + if (any(method == "random" | grepl("^unweighted", method))) { + + # all the cells have the same probability to be selected + message(sprintf("Selecting %i background point based on %s method.", n, method )) + + # Apparently this breaks if there are a lot of NA values.. using a different version for now + if (0) { + # This version has issues sampling rasters with a lot (or mostly) NA value cells + backgr <- terra::spatSample(sum_na_layer, + size = n, + method="random", + replace = FALSE, + na.rm = T, + xy = TRUE, + as.points = FALSE, + values = F) + }else{ + # cells only retrieves non-NA cells + backgr <- sample(cells(sum_na_layer), n) + backgr <- xyFromCell(sum_na_layer,backgr) + + } + } else if (any(method == "thickening")) { + + message(sprintf("Selecting %i background point based on %s method.", n, method )) + + obs_vec <- terra::vect(obs, geom = c("lon", "lat"), crs = crs(predictors)) + + if (is.null(width_buffer)) { + width_buffer <- mean(terra::distance(obs_vec)) + } else { + width_buffer <- as.numeric(width_buffer) + } + buf <- terra::buffer(obs_vec, width_buffer, quadsegs = 10) + buf_r <- !is.na(terra::rasterize(buf[1], sum_na_layer)) + for (i in 2:nrow(buf)) { + buf_r <- buf_r + !is.na(terra::rasterize(buf[i], sum_na_layer)) + } + buf_r <- terra::mask(buf_r, sum_na_layer) + backgr <- terra::spatSample(buf_r, + size = n, + method = "weighted", + replace = FALSE, + na.rm = T, + xy = TRUE, + as.points = FALSE, + values = F) + + } else if (any(method == "biased")) { + + if (!is.null(mask)) density_bias <- fast_crop(density_bias, mask) + + backgr <- terra::spatSample(density_bias, + size = n, + method = "weighted", + replace = FALSE, + na.rm = T, + xy = TRUE, + as.points = FALSE, + values = F) + + } else if (method == "inclusion_buffer") { + + obs_vec <- terra::vect(obs, geom = c("lon", "lat"), crs = crs(predictors)) + + if (is.null(dist_buffer)) { + + message("Buffer distance not provided. Using the 95% quantile + of the minimum distance between each point.") + dist_buffer <- calculate_dist_buffer(obs |> dplyr::select(lon, lat)) + message(sprintf("Buffer distance: %s (unit of projection)", dist_buffer)) + + } + + # Creating the buffer + + buffer_shape <- rgeos::gBuffer(spgeom = as(obs_vec, "Spatial"), + byid = FALSE, width = dist_buffer) + + # crops the predictors to that shape to rasterize + sum_na_layer <- fast_crop(sum_na_layer, buffer_shape) + + message(sprintf("Trying selecting %i background point based on %s method.", n ,method )) + backgr <- terra::spatSample(sum_na_layer, + size = n, + method="random", + replace=FALSE, + na.rm=T, + xy=TRUE, + as.points=FALSE, + values=F) + + } else if (grepl("^weighted", method)) { + ## Set heatmap cells to NA if they are NA in sum_na_layer + ## Assumes the last layer of predictors is the heatmap, and that it is counts! + values(predictors[[dim(predictors)[3]]])[which(is.na(values(sum_na_layer)))] <- NA + tgb_weights <- predictors[[dim(predictors)[3]]] + tgb_weights <- tgb_weights/sum(values(tgb_weights), na.rm = T) + + #backgr <- sample(cells(tgb_weights), n, prob = unlist(extract(tgb_weights, cells(tgb_weights)))) + #backgr <- xyFromCell(tgb_weights, backgr) + + # Sampling using density as probabilities + # We still run into NA values in the weights, which we should exclude + weightsVector <- unlist(extract(tgb_weights, cells(tgb_weights))) + names(weightsVector) <- cells(tgb_weights) + weightsVector <- na.omit(weightsVector) + + if(length(weightsVector) < n){ + message( + "Number of background-points exceeds number of cell. ", + ncellr, + " background-points will be sampled" + ) + backgr <- names(weightsVector) + + }else{ + backgr <- sample(names(weightsVector), n, prob = weightsVector) + + } + + backgr <- xyFromCell(tgb_weights, as.numeric(backgr)) + } + } + message(sprintf("%s selected", nrow(backgr))) + + #species <- unique(obs$scientific_name) This didn't work if there are more than one equivalent species names + species <- obs$scientific_name[1] + backgr <- dplyr::bind_cols(id = 1:nrow(backgr), + scientific_name = species, + backgr |> data.frame()) |> + setNames(c("id", "scientific_name", "lon", "lat")) + + return(backgr) +} + + +calculate_dist_buffer <- function(obs, n = 1000) { + #Uses the first 1000 points (randomly sampled) to create buffers and distances + if (nrow(obs) > n) { + nb_buffer_point <- n + } else { + nb_buffer_point <- nrow(obs) - 1 + } + + sample_locations <- obs[sample(c(1:nrow(obs)), + size = nb_buffer_point, replace = FALSE), ] + #Uses the 95% quantile of the minimum distance between each point + distance <- raster::pointDistance(sample_locations, lonlat = FALSE) + mindist <- c() + for (q in 1:ncol(distance)) { + distance_zero <- distance[which(distance[, q] > 0), q] + mindist <- c(mindist, min(distance_zero)) + } + dist_buffer <- 2 * stats::quantile(mindist, 0.95) + return(dist_buffer) +} From ea7ec609ee5ff181e2ba129f243b9a96ac2e5032 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= Date: Fri, 8 Sep 2023 15:41:53 -0400 Subject: [PATCH 02/10] check argument value to make sure it goes to a specific place --- scripts/SDM/selectBackgroundFunc.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/SDM/selectBackgroundFunc.R b/scripts/SDM/selectBackgroundFunc.R index 875d3b65..5f560ff6 100644 --- a/scripts/SDM/selectBackgroundFunc.R +++ b/scripts/SDM/selectBackgroundFunc.R @@ -24,6 +24,8 @@ create_background <- function( width_buffer = NULL, raster = NULL) { + method <- match.arg(method) + proj <- terra::crs(predictors, proj = T) ## New method: If we use raster, we re-project our raster and add it as an additional layer From 59c1dde06ff07cec4abb8a8c9ad4cd2d6ed3f57c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= Date: Wed, 13 Sep 2023 16:26:49 -0400 Subject: [PATCH 03/10] slightly better documentation for create_background --- scripts/SDM/selectBackgroundFunc.R | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/scripts/SDM/selectBackgroundFunc.R b/scripts/SDM/selectBackgroundFunc.R index 5f560ff6..d2f2cdec 100644 --- a/scripts/SDM/selectBackgroundFunc.R +++ b/scripts/SDM/selectBackgroundFunc.R @@ -1,15 +1,21 @@ #' @title Create background points +#' +#' @description +#' Generates background points using any of the six available methods. +#' +#' @details +#' When `method = "random"`, background points are randomly sampled throughout the whole study extent. When `method = "weighted_raster"`, background points are sampled in proportion to the number of observations of a target group in an observation density raster. When `method = "unweighted_raster"`, background points are sampled only in cells where there are observations from a target group. With `method = "inclusion_buffer"`, background points are sampled within a buffer around observations (to be confirmed...). With `method = "thickening"`, background points are sampled in proportion the local density of observations by sampling in a buffer around each observation (to be confirmed...). Finally, when `method = "biased"`, a `density_bias` raster representing the effort is given and background points are sampled in proportion to this raster (to be confirmed...). #' #' @name create_background -#' @param predictors spat raster, containing the predictor variables -#' @param mask, spat vector, mask to apply to the predictors. -#' @param method string, random, inclusion_buffer (thickening or biased), or raster (unweighted or weighted) -#' @param n integer, number of background points to select. -#' @param obs data.frame, containing the observations. Used with method == "thickening" or "inclusion_buffer" -#' @param width_buffer int, buffer width around observations. Used with method == "inclusion_buffer" -#' @param species string, species name. +#' @param predictors SpatRasterr, containing the predictor variables +#' @param mask SpatVector, mask to apply to the predictors. +#' @param method one of "random","weighted_raster","unweighted_raster","inclusion_buffer","biased","thickening". +#' @param n integer, number of background points to sample. +#' @param obs data.frame, containing the observations. Used with "thickening" or "inclusion_buffer". +#' @param density_bias SpatRaster giving an effort/bias surface from which background points are sampled +#' @param width_buffer int, buffer width around observations. Used with "thickening" or "inclusion_buffer". #' @param raster SpatRaster, raster heatmap used for weighted or unweighted sampling, default NULL when not using those methods #' @return spatial points #' @export @@ -196,7 +202,7 @@ create_background <- function( calculate_dist_buffer <- function(obs, n = 1000) { - #Uses the first 1000 points (randomly sampled) to create buffers and distances + # uses the first 1000 points (randomly sampled) to create buffers and distances if (nrow(obs) > n) { nb_buffer_point <- n } else { From bd027d2544fa2198b57c702b953b7a579f0f7fcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= Date: Fri, 29 Sep 2023 12:03:28 -0400 Subject: [PATCH 04/10] as.data.frame does not check names which avoids the conversion of - in . in col names coming from io and do not convert col names when reading data --- scripts/SDM/runMaxent.R | 2 +- scripts/SDM/runMaxentFunc.R | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/SDM/runMaxent.R b/scripts/SDM/runMaxent.R index aa24adbf..339ed95f 100644 --- a/scripts/SDM/runMaxent.R +++ b/scripts/SDM/runMaxent.R @@ -31,7 +31,7 @@ print("Inputs : ") print(input) -presence_background <- read.table(file = input$presence_background, sep = '\t', header = TRUE) +presence_background <- read.table(file = input$presence_background, sep = '\t', header = TRUE, check.names = FALSE) predictors <- terra::rast(unlist(input$predictors)) mod_tuning <- run_maxent(presence_background, with_raster = F, # can be set to F to speed up diff --git a/scripts/SDM/runMaxentFunc.R b/scripts/SDM/runMaxentFunc.R index 5b35e5a9..1ce92af1 100644 --- a/scripts/SDM/runMaxentFunc.R +++ b/scripts/SDM/runMaxentFunc.R @@ -22,8 +22,8 @@ run_maxent <- function(presence.bg, with_raster = F, parallelType = "doParallel" ) { - presence <- presence.bg |> dplyr::filter(pa == 1) |> data.frame() - background <- presence.bg |> dplyr::filter(pa == 0) |> data.frame() + presence <- presence.bg |> dplyr::filter(pa == 1) |> as.data.frame() + background <- presence.bg |> dplyr::filter(pa == 0) |> as.data.frame() if (with_raster) { ENMmodel <- ENMeval::ENMevaluate(occs = presence[, c("lon", "lat")], @@ -259,8 +259,8 @@ predict_maxent <- function(presence_background, # We calculate the prediction with the whole dataset - presence <- presence_background |> dplyr::filter(pa == 1) |> data.frame() - background <- presence_background |> dplyr::filter(pa == 0) |> data.frame() + presence <- presence_background |> dplyr::filter(pa == 1) |> as.data.frame() + background <- presence_background |> dplyr::filter(pa == 0) |> as.data.frame() mod_tuning <- ENMeval::ENMevaluate(occs = presence[, c("lon", "lat", layers)], @@ -304,8 +304,8 @@ runs <- c("run_1") backg_test <- backgr[bg.grp == g, ] presence_bg_train <- group.all[which(group.all[,1] == g),] - presence <- presence_bg_train |> dplyr::filter(pa == 1) |> data.frame() - background <- presence_bg_train |> dplyr::filter(pa == 0) |> data.frame() + presence <- presence_bg_train |> dplyr::filter(pa == 1) |> as.data.frame() + background <- presence_bg_train |> dplyr::filter(pa == 0) |> as.data.frame() mod_tuning <- ENMeval::ENMevaluate(occs = presence[, c("lon", "lat", layers)], From a47fb13ef92f70db88078aca1ff0c7cfc03fcd20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= Date: Mon, 2 Oct 2023 16:50:06 -0400 Subject: [PATCH 05/10] some transition to terra (started because raster also converts - to . in variable names) --- scripts/SDM/runMaxent.R | 10 +++++----- scripts/SDM/runMaxentFunc.R | 18 ++++++++++++------ 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/scripts/SDM/runMaxent.R b/scripts/SDM/runMaxent.R index 339ed95f..ef16bb20 100644 --- a/scripts/SDM/runMaxent.R +++ b/scripts/SDM/runMaxent.R @@ -52,7 +52,7 @@ mod_tuning <- run_maxent(presence_background, res_tuning <- mod_tuning@results tuned_param <- select_param(res_tuning, method = input$method_select_params, list = T) -predictors <- raster::stack(predictors) +#predictors <- raster::stack(predictors) sdms <- predict_maxent(presence_background, algorithm = "maxent.jar", @@ -73,17 +73,17 @@ names(sdm_pred) <- "prediction" sdm_runs <- sdms[["pred_runs"]] pred.output <- file.path(outputFolder, "sdm_pred.tif") -runs.output <- paste0(outputFolder,"/sdm_runs_", 1:nlayers(sdm_runs), ".tif") +runs.output <- paste0(outputFolder,"/sdm_runs_", 1:nlyr(sdm_runs), ".tif") #runs.output <- file.path(outputFolder, "sdm_runs.tif") -sdm_pred<-project(rast(sdm_pred),crs(input$proj)) ##Temporary fix while maxent transitions to terra +sdm_pred<-project(sdm_pred,crs(input$proj)) ##Temporary fix while maxent transitions to terra terra::writeRaster(x = sdm_pred, filename = pred.output, filetype = "COG", wopt= list(gdal=c("COMPRESS=DEFLATE")), overwrite = TRUE) -for (i in 1:nlayers(sdm_runs)){ - thisrun<-project(rast(sdm_runs[[i]]),crs(input$proj)) ##Temporary fix while maxent transitions to terra +for (i in 1:nlyr(sdm_runs)){ + thisrun<-project(sdm_runs[[i]],crs(input$proj)) ##Temporary fix while maxent transitions to terra terra::writeRaster(x = thisrun, filename = file.path(outputFolder, paste0("/sdm_runs_", i, ".tif")), filetype = "COG", diff --git a/scripts/SDM/runMaxentFunc.R b/scripts/SDM/runMaxentFunc.R index 1ce92af1..70a89b56 100644 --- a/scripts/SDM/runMaxentFunc.R +++ b/scripts/SDM/runMaxentFunc.R @@ -322,12 +322,18 @@ runs <- c("run_1") pred_pres <- dismo::predict(mod_tuning@models[[tuned_param]], predictors, args = sprintf("outputformat=%s", type)) - if (inherits(pred_runs, "RasterLayer") || inherits(pred_runs, "RasterStack")) { - pred_runs <- raster::stack(pred_runs, pred_pres) - - } else { - pred_runs <- pred_pres - } + #if (inherits(pred_runs, "RasterLayer") || inherits(pred_runs, "RasterStack")) { + # pred_runs <- raster::stack(pred_runs, pred_pres) + #} else { + # pred_runs <- pred_pres + #} + if (is.null(pred_runs)) { + pred_runs <- pred_pres + } else { + pred_runs <- c(pred_runs, pred_pres) + } + + } From 1596a529283d2fc65b2dae3fac7e161d3b4047a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= Date: Tue, 3 Oct 2023 14:57:24 -0400 Subject: [PATCH 06/10] changed some defaults for faster pipeline runs and more predictors --- pipelines/SDM/SDM_maxEnt.json | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pipelines/SDM/SDM_maxEnt.json b/pipelines/SDM/SDM_maxEnt.json index a9f2cec4..0fd939d9 100644 --- a/pipelines/SDM/SDM_maxEnt.json +++ b/pipelines/SDM/SDM_maxEnt.json @@ -366,7 +366,6 @@ "data": { "type": "text[]", "value": [ - "L", "LQ", "LQHP" ] @@ -384,8 +383,7 @@ "type": "float[]", "value": [ 0.5, - 1, - 2 + 1 ] } }, @@ -852,7 +850,7 @@ "checkerboard1", "checkerboard2" ], - "example": "block" + "example": "randomkfold" }, "data>loadFromStac.yml@119|stac_url": { "description": "URL of the Stac catalog", @@ -865,15 +863,19 @@ "label": "collections_items", "type": "text[]", "example": [ - "chelsa-clim|bio1", - "chelsa-clim|bio2" + "chelsa-clim|bio1", + "chelsa-clim|bio7", + "chelsa-clim|bio12", + "earthenv_topography|elevation_median", + "earthenv_landcover|class_3", + "earthenv_landcover|class_12" ] }, "data>loadFromStac.yml@119|spatial_res": { "description": "Integer, spatial resolution of the rasters", "label": "spatial resolution", "type": "int", - "example": 1000 + "example": 5000 }, "data>loadFromStac.yml@119|mask": { "description": "Shapefile, used to mask the output rasters", From 50d366986c4e3b5a07fa44957782408996e64cb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= Date: Tue, 3 Oct 2023 16:25:57 -0400 Subject: [PATCH 07/10] default back to block --- pipelines/SDM/SDM_maxEnt.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/SDM/SDM_maxEnt.json b/pipelines/SDM/SDM_maxEnt.json index 0fd939d9..8e494161 100644 --- a/pipelines/SDM/SDM_maxEnt.json +++ b/pipelines/SDM/SDM_maxEnt.json @@ -850,7 +850,7 @@ "checkerboard1", "checkerboard2" ], - "example": "randomkfold" + "example": "block" }, "data>loadFromStac.yml@119|stac_url": { "description": "URL of the Stac catalog", From 6c58e74400c1b1857c17cc1a5438120533a8cc5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= Date: Wed, 4 Oct 2023 11:39:52 -0400 Subject: [PATCH 08/10] replaced with a simplified version and added some options that were unavailable/not working --- scripts/SDM/selectBackgroundFunc.R | 237 +++++++++++------------------ 1 file changed, 87 insertions(+), 150 deletions(-) diff --git a/scripts/SDM/selectBackgroundFunc.R b/scripts/SDM/selectBackgroundFunc.R index f0dc9cf4..d9690571 100644 --- a/scripts/SDM/selectBackgroundFunc.R +++ b/scripts/SDM/selectBackgroundFunc.R @@ -8,6 +8,7 @@ #' @param method string, random, inclusion_buffer (thickening or biased), or raster (unweighted or weighted) #' @param n integer, number of background points to select. #' @param obs data.frame, containing the observations. Used with method == "thickening" or "inclusion_buffer" +#' @param density_bias spatRaster of densities for method biased (ignored in the pipeline at the moment) #' @param width_buffer int, buffer width around observations. Used with method == "inclusion_buffer" #' @param species string, species name. #' @param raster SpatRaster, raster heatmap used for weighted or unweighted sampling, default NULL when not using those methods @@ -17,182 +18,118 @@ create_background <- function( predictors, mask = NULL, - method = "random", + method = c("random","weighted_raster","unweighted_raster","inclusion_buffer","biased","thickening"), n = 10000, obs = NULL, density_bias = NULL, width_buffer = NULL, raster = NULL) { - proj <- terra::crs(predictors, proj = T) + method <- match.arg(method) + + proj <- terra::crs(predictors) ## New method: If we use raster, we re-project our raster and add it as an additional layer if (grepl("raster", method)) { if (is.null(raster)) stop(paste("No raster included with method:", method)) - - # Bilinear interpolation when projecting in this manner - # Exclude 0s, since we don't want to sample areas with no sightings - raster[raster <= 0] <- NA - - add(predictors) <- raster - + } + + if (method == "biased") { + if (is.null(density_bias)) stop(paste("No density_bias included with method:", method)) } if (!is.null(mask)) { predictors <- fast_crop(predictors, mask) } - sum_na_layer <- terra::tapp(predictors, index = rep(1, terra::nlyr(predictors)), fun = sum, na.rm = F) - - ncellr <- terra::global(!is.na(sum_na_layer), sum) - - if (ncellr < n) { - message( - "Number of background-points exceeds number of cell. ", - ncellr, - " background-points will be sampled" - ) - backgr <- terra::as.data.frame(predictors, na.rm = TRUE, xy = TRUE) |> dplyr::select(x, y) + if (method %in% c("inclusion_buffer","thickening")){ + ### Not sure why the following buffer width method was originally used. package FNN could be faster for searching nearest neighbours. + #message("Argument width_buffer not provided. Using the 95% quantile + # of the nearest neighbour distance that is not in the same location.") + # + #if(nrow(obs) > 1000){ + # sampled_obs <- sample(1:nrow(obs), min(1000, nrow(obs))) + # message("Only 1000 randomly sampled locations are used to determine the width_buffer.") + #}else{ + # sampled_obs <- 1:nrow(obs) + #} + #width_buffer<-quantile(st_distance(obs[sampled_obs,]),0.95) + # + #width_buffer<-obs[sampled_obs,] |> + # st_distance() |> + # apply(2,function(x){min(x[x>0])}) |> + # quantile(0.95) + # + #message(sprintf("width-buffer used is %s (in the units of the crs)", width_buffer)) - } else { + if (is.null(obs)){ + stop(paste("No obs included with method:", method)) + } - if (any(method == "random" | grepl("^unweighted", method))) { - - # all the cells have the same probability to be selected - message(sprintf("Selecting %i background point based on %s method.", n, method )) - - # Apparently this breaks if there are a lot of NA values.. using a different version for now - if (0) { - # This version has issues sampling rasters with a lot (or mostly) NA value cells - backgr <- terra::spatSample(sum_na_layer, - size = n, - method="random", - replace = FALSE, - na.rm = T, - xy = TRUE, - as.points = FALSE, - values = F) - }else{ - # cells only retrieves non-NA cells - backgr <- sample(cells(sum_na_layer), n) - backgr <- xyFromCell(sum_na_layer,backgr) - - } - } else if (any(method == "thickening")) { - - message(sprintf("Selecting %i background point based on %s method.", n, method )) - - obs_vec <- terra::vect(obs, geom = c("lon", "lat"), crs = crs(predictors)) - - if (is.null(width_buffer)) { - width_buffer <- mean(terra::distance(obs_vec)) - } else { - width_buffer <- as.numeric(width_buffer) - } - buf <- terra::buffer(obs_vec, width_buffer, quadsegs = 10) - buf_r <- !is.na(terra::rasterize(buf[1], sum_na_layer)) - for (i in 2:nrow(buf)) { - buf_r <- buf_r + !is.na(terra::rasterize(buf[i], sum_na_layer)) - } - buf_r <- terra::mask(buf_r, sum_na_layer) - backgr <- terra::spatSample(buf_r, - size = n, - method = "weighted", - replace = FALSE, - na.rm = T, - xy = TRUE, - as.points = FALSE, - values = F) - - } else if (any(method == "biased")) { - - if (!is.null(mask)) density_bias <- fast_crop(density_bias, mask) - - backgr <- terra::spatSample(density_bias, - size = n, - method = "weighted", - replace = FALSE, - na.rm = T, - xy = TRUE, - as.points = FALSE, - values = F) - - } else if (method == "inclusion_buffer") { - - obs_vec <- terra::vect(obs, geom = c("lon", "lat"), crs = crs(predictors)) - - if (is.null(dist_buffer)) { - - message("Buffer distance not provided. Using the 95% quantile - of the minimum distance between each point.") - dist_buffer <- calculate_dist_buffer(obs |> dplyr::select(lon, lat)) - message(sprintf("Buffer distance: %s (unit of projection)", dist_buffer)) - - } - - # Creating the buffer - - buffer_shape <- rgeos::gBuffer(spgeom = as(obs_vec, "Spatial"), - byid = FALSE, width = dist_buffer) - - # crops the predictors to that shape to rasterize - sum_na_layer <- fast_crop(sum_na_layer, buffer_shape) - - message(sprintf("Trying selecting %i background point based on %s method.", n ,method )) - backgr <- terra::spatSample(sum_na_layer, - size = n, - method="random", - replace=FALSE, - na.rm=T, - xy=TRUE, - as.points=FALSE, - values=F) - - } else if (grepl("^weighted", method)) { - ## Set heatmap cells to NA if they are NA in sum_na_layer - ## Assumes the last layer of predictors is the heatmap, and that it is counts! - values(predictors[[dim(predictors)[3]]])[which(is.na(values(sum_na_layer)))] <- NA - tgb_weights <- predictors[[dim(predictors)[3]]] - tgb_weights <- tgb_weights/sum(values(tgb_weights), na.rm = T) - - #backgr <- sample(cells(tgb_weights), n, prob = unlist(extract(tgb_weights, cells(tgb_weights)))) - #backgr <- xyFromCell(tgb_weights, backgr) - - # Sampling using density as probabilities - # We still run into NA values in the weights, which we should exclude - weightsVector <- unlist(extract(tgb_weights, cells(tgb_weights))) - names(weightsVector) <- cells(tgb_weights) - weightsVector <- na.omit(weightsVector) - - if(length(weightsVector) < n){ - message( - "Number of background-points exceeds number of cell. ", - ncellr, - " background-points will be sampled" - ) - backgr <- names(weightsVector) + obs <- st_as_sf(obs, coords=c("lon", "lat"), crs = proj) - }else{ - backgr <- sample(names(weightsVector), n, prob = weightsVector) - - } - - backgr <- xyFromCell(tgb_weights, as.numeric(backgr)) + if(is.null(width_buffer)){ + width_buffer <- 0.1 * max(diff(st_bbox(obs)[c(1, 3)]),diff(st_bbox(obs)[c(2, 4)])) + message(sprintf("width_buffer used is 10%% of the largest bounding box dimension (%s in the units of the crs)", round(width_buffer))) } + } - message(sprintf("%s selected", nrow(backgr))) - #species <- unique(obs$scientific_name) This didn't work if there are more than one equivalent species names - species <- obs$scientific_name[1] - backgr <- dplyr::bind_cols(id = 1:nrow(backgr), - scientific_name = species, - backgr |> data.frame()) |> - setNames(c("id", "scientific_name", "lon", "lat")) + backgr<-switch(method, + random = { + terra::spatSample(predictors[[1]], size = n, method = "random", replace = TRUE, values = FALSE, as.points = TRUE) + }, + weighted_raster = { + terra::spatSample(raster, size = n, method = "weights", replace = TRUE, values = FALSE, as.points = TRUE) + }, + unweighted_raster = { + terra::spatSample(raster > 0, size = n, method = "weights", replace = TRUE, values = FALSE, as.points = TRUE) + }, + inclusion_buffer = { # this thing can take a while if there are many observations + obs |> + st_buffer(dist = width_buffer, nQuadSegs = 10) |> + st_union() |> + st_sample(n) |> + st_as_sf() + }, + biased = { # not sure how this method differs from weighted_raster, ignored at the moment as an option + terra::spatSample(density_bias, size = n, method = "weights", replace = TRUE, values = FALSE, as.points = TRUE) + }, + thickening = { + # There is probably a faster way using a radius and angle from each observations. Right now, rasterizing the buffers and sampling from the raster seems much faster than sampling within the buffers using sf (commented version) + #backgr<-obs |> + # st_buffer(dist = width_buffer, nQuadSegs = 10) |> + # st_sample(size = rep(ceiling(n / nrow(obs)),nrow(obs)), by_polygon = FALSE) |> + # st_as_sf() + #backgr[sample(1:nrow(backgr),n),] + terra::spatSample( + rasterize(st_buffer(obs, dist = width_buffer, nQuadSegs = 10), rast(predictors[[1]]), fun = sum), + size = n, + method = "weights", + replace = TRUE, + values = FALSE, + as.points = TRUE + ) + } + ) + backgr <- backgr[,-1] # temporary fix for https://github.com/rspatial/terra/issues/1275 + backgr <- st_as_sf(backgr) |> + st_coordinates() |> + data.frame(id = 1:nrow(backgr), scientific_name = obs$scientific_name[1], lon = _) |> + setNames(c("id","scientific_name","lon","lat")) + backgr - return(backgr) } + + + + + + + + calculate_dist_buffer <- function(obs, n = 1000) { #Uses the first 1000 points (randomly sampled) to create buffers and distances if (nrow(obs) > n) { From f95c695a19ff636466f408a72d961eb2762fa07d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= Date: Wed, 4 Oct 2023 11:41:07 -0400 Subject: [PATCH 09/10] added background thickening as an option --- pipelines/SDM/SDM_maxEnt.json | 3 ++- scripts/SDM/selectBackground.yml | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pipelines/SDM/SDM_maxEnt.json b/pipelines/SDM/SDM_maxEnt.json index 8e494161..bbf2c6af 100644 --- a/pipelines/SDM/SDM_maxEnt.json +++ b/pipelines/SDM/SDM_maxEnt.json @@ -761,7 +761,8 @@ "random", "inclusion_buffer", "weighted_raster", - "unweighted_raster" + "unweighted_raster", + "thickening" ], "example": "random" }, diff --git a/scripts/SDM/selectBackground.yml b/scripts/SDM/selectBackground.yml index 1ebc31b5..9bd0e30c 100644 --- a/scripts/SDM/selectBackground.yml +++ b/scripts/SDM/selectBackground.yml @@ -26,6 +26,7 @@ inputs: - inclusion_buffer - weighted_raster - unweighted_raster + - thickening example: random n_background: label: number of background points From 113bad475b5a1b1c870d300705555f1b479feb88 Mon Sep 17 00:00:00 2001 From: frousseu Date: Wed, 4 Oct 2023 13:25:27 -0400 Subject: [PATCH 10/10] added some metada to pipeline --- pipelines/SDM/SDM_maxEnt.json | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/pipelines/SDM/SDM_maxEnt.json b/pipelines/SDM/SDM_maxEnt.json index bbf2c6af..5323f76e 100644 --- a/pipelines/SDM/SDM_maxEnt.json +++ b/pipelines/SDM/SDM_maxEnt.json @@ -864,12 +864,12 @@ "label": "collections_items", "type": "text[]", "example": [ - "chelsa-clim|bio1", - "chelsa-clim|bio7", - "chelsa-clim|bio12", - "earthenv_topography|elevation_median", - "earthenv_landcover|class_3", - "earthenv_landcover|class_12" + "chelsa-clim|bio1", + "chelsa-clim|bio7", + "chelsa-clim|bio12", + "earthenv_topography|elevation_median", + "earthenv_landcover|class_3", + "earthenv_landcover|class_12" ] }, "data>loadFromStac.yml@119|spatial_res": { @@ -899,5 +899,17 @@ 1 ] } + }, + "metadata": { + "name": "Maxent SDM pipeline", + "description": "A pipeline to produce species distribution models using Maxent", + "author": [ + { + "name": "Sarah Valentin", + "identifier": "https://orcid.org/0000-0002-9028-681X" + } + ], + "license": null, + "external_link": null } } \ No newline at end of file