From 6702cacd1de0bfdeb407a5fb49b86f58ff27d3df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= <rouf1703@usherbrooke.ca>
Date: Fri, 8 Sep 2023 15:28:25 -0400
Subject: [PATCH 01/10] be explicit about the possible methods and exact since
 there is some fuzzy matching for the method names

---
 scripts/SDM/selectBackgroundFunc.R | 430 ++++++++++++++---------------
 1 file changed, 215 insertions(+), 215 deletions(-)

diff --git a/scripts/SDM/selectBackgroundFunc.R b/scripts/SDM/selectBackgroundFunc.R
index eea36c95..875d3b65 100644
--- a/scripts/SDM/selectBackgroundFunc.R
+++ b/scripts/SDM/selectBackgroundFunc.R
@@ -1,215 +1,215 @@
-
-
-#' @title Create background points
-#'
-#' @name create_background
-#' @param predictors spat raster, containing the predictor variables
-#' @param mask, spat vector, mask to apply to the predictors.
-#' @param method string, random, inclusion_buffer (thickening or biased), or raster (unweighted or weighted)
-#' @param n integer, number of background points to select.
-#' @param obs data.frame, containing the observations. Used with method == "thickening" or "inclusion_buffer"
-#' @param width_buffer int, buffer width around observations. Used with method ==  "inclusion_buffer"
-#' @param species string, species name.
-#' @param raster SpatRaster, raster heatmap used for weighted or unweighted sampling, default NULL when not using those methods
-#' @return spatial points
-#' @export
-
-create_background <- function(
-    predictors,
-    mask = NULL,
-    method = "random",
-    n = 10000,  
-    obs = NULL,
-    density_bias = NULL,
-    width_buffer = NULL,
-    raster = NULL) {
-  
-  proj <- terra::crs(predictors, proj = T)
-  
-  ## New method: If we use raster, we re-project our raster and add it as an additional layer
-  if (grepl("raster", method)) {
-    if (is.null(raster)) stop(paste("No raster included with method:", method))
-    
-    # Bilinear interpolation when projecting in this manner
-    # Exclude 0s, since we don't want to sample areas with no sightings
-    raster[raster <= 0] <- NA
-    
-    add(predictors) <- raster
-    
-  }
-  
-  if (!is.null(mask)) {
-    predictors <- fast_crop(predictors, mask)
-  }
-  
-  sum_na_layer <-  terra::tapp(predictors, index = rep(1, terra::nlyr(predictors)), fun = sum, na.rm = F)
-  
-  ncellr <- terra::global(!is.na(sum_na_layer), sum)
-  
-  if (ncellr < n) {
-    message(
-      "Number of background-points exceeds number of cell. ",
-      ncellr,
-      " background-points will be sampled"
-    )
-    backgr <- terra::as.data.frame(predictors, na.rm = TRUE, xy = TRUE) |> dplyr::select(x, y)
-    
-  } else {
-    
-    if (any(method == "random" | grepl("^unweighted", method))) {
-      
-      # all the cells have the same probability to be selected
-      message(sprintf("Selecting %i background point based on %s method.", n, method  ))
-      
-      # Apparently this breaks if there are a lot of NA values.. using a different version for now
-      if (0) {
-        # This version has issues sampling rasters with a lot (or mostly) NA value cells
-        backgr <- terra::spatSample(sum_na_layer,
-                                    size = n,
-                                    method="random", 
-                                    replace = FALSE,
-                                    na.rm = T,
-                                    xy = TRUE, 
-                                    as.points = FALSE, 
-                                    values = F)
-      }else{
-        # cells only retrieves non-NA cells
-        backgr <- sample(cells(sum_na_layer), n)
-        backgr <- xyFromCell(sum_na_layer,backgr)
-        
-      }
-    } else if (any(method == "thickening")) {
-      
-      message(sprintf("Selecting %i background point based on %s method.", n, method  ))
-      
-      obs_vec <- terra::vect(obs, geom = c("lon", "lat"), crs = crs(predictors))
-      
-      if (is.null(width_buffer)) {
-        width_buffer <- mean(terra::distance(obs_vec))
-      } else {
-        width_buffer <- as.numeric(width_buffer)
-      }
-      buf <- terra::buffer(obs_vec, width_buffer, quadsegs = 10)
-      buf_r <- !is.na(terra::rasterize(buf[1], sum_na_layer))
-      for (i in 2:nrow(buf)) {
-        buf_r <- buf_r + !is.na(terra::rasterize(buf[i], sum_na_layer))
-      }
-      buf_r <- terra::mask(buf_r, sum_na_layer)
-      backgr <- terra::spatSample(buf_r,
-                                  size = n,
-                                  method = "weighted", 
-                                  replace = FALSE,
-                                  na.rm = T,
-                                  xy = TRUE, 
-                                  as.points = FALSE, 
-                                  values = F)
-      
-    } else if (any(method == "biased")) {
-      
-      if (!is.null(mask)) density_bias <- fast_crop(density_bias, mask)
-      
-      backgr <- terra::spatSample(density_bias,
-                                  size = n,
-                                  method = "weighted", 
-                                  replace = FALSE,
-                                  na.rm = T,
-                                  xy = TRUE, 
-                                  as.points = FALSE, 
-                                  values = F)
-      
-    } else if (method == "inclusion_buffer") {
-      
-      obs_vec <- terra::vect(obs, geom = c("lon", "lat"), crs = crs(predictors))
-      
-      if (is.null(dist_buffer)) {
-        
-        message("Buffer distance not provided. Using the 95% quantile 
-      of the minimum distance between each point.")
-        dist_buffer <- calculate_dist_buffer(obs |> dplyr::select(lon, lat))
-        message(sprintf("Buffer distance: %s (unit of projection)", dist_buffer))
-        
-      }
-      
-      # Creating the buffer
-      
-      buffer_shape <- rgeos::gBuffer(spgeom = as(obs_vec, "Spatial"),
-                                     byid = FALSE, width = dist_buffer)
-      
-      # crops the predictors to that shape to rasterize
-      sum_na_layer <- fast_crop(sum_na_layer,  buffer_shape)
-      
-      message(sprintf("Trying selecting %i background point based on %s method.", n ,method  ))
-      backgr <- terra::spatSample(sum_na_layer,
-                                  size = n, 
-                                  method="random",
-                                  replace=FALSE,
-                                  na.rm=T,
-                                  xy=TRUE,
-                                  as.points=FALSE, 
-                                  values=F)
-      
-    } else if (grepl("^weighted", method)) {
-      ## Set heatmap cells to NA if they are NA in sum_na_layer
-      ## Assumes the last layer of predictors is the heatmap, and that it is counts!
-      values(predictors[[dim(predictors)[3]]])[which(is.na(values(sum_na_layer)))] <- NA
-      tgb_weights <- predictors[[dim(predictors)[3]]]
-      tgb_weights <- tgb_weights/sum(values(tgb_weights), na.rm = T)
-      
-      #backgr <- sample(cells(tgb_weights), n, prob = unlist(extract(tgb_weights, cells(tgb_weights))))
-      #backgr <- xyFromCell(tgb_weights, backgr)
-
-      # Sampling using density as probabilities
-      # We still run into NA values in the weights, which we should exclude
-      weightsVector <- unlist(extract(tgb_weights, cells(tgb_weights)))
-      names(weightsVector) <- cells(tgb_weights)
-      weightsVector <- na.omit(weightsVector)
-
-      if(length(weightsVector) < n){
-        message(
-          "Number of background-points exceeds number of cell. ",
-          ncellr,
-          " background-points will be sampled"
-        )
-        backgr <- names(weightsVector)
-    
-      }else{
-        backgr <- sample(names(weightsVector), n, prob = weightsVector)
-
-      }
-
-      backgr <- xyFromCell(tgb_weights, as.numeric(backgr))
-    }
-  }
-  message(sprintf("%s selected", nrow(backgr)))
-  
-  #species <- unique(obs$scientific_name) This didn't work if there are more than one equivalent species names
-  species <- obs$scientific_name[1]
-  backgr <- dplyr::bind_cols(id = 1:nrow(backgr),
-                             scientific_name = species,
-                             backgr |> data.frame()) |>
-    setNames(c("id", "scientific_name", "lon", "lat"))
-  
-  return(backgr)
-}
-
-
-calculate_dist_buffer <- function(obs, n = 1000) {
-  #Uses the first 1000 points (randomly sampled) to create buffers and distances
-  if (nrow(obs) > n) {
-    nb_buffer_point <- n
-  } else {
-    nb_buffer_point <- nrow(obs) - 1
-  }
-  
-  sample_locations <- obs[sample(c(1:nrow(obs)),
-                                 size = nb_buffer_point, replace = FALSE), ]
-  #Uses the 95% quantile of the minimum distance between each point
-  distance <- raster::pointDistance(sample_locations, lonlat = FALSE)
-  mindist <- c()
-  for (q in 1:ncol(distance)) {
-    distance_zero <- distance[which(distance[, q] > 0), q]
-    mindist <- c(mindist, min(distance_zero))
-  }
-  dist_buffer <- 2 * stats::quantile(mindist, 0.95)
-  return(dist_buffer)
-}
+
+
+#' @title Create background points
+#'
+#' @name create_background
+#' @param predictors spat raster, containing the predictor variables
+#' @param mask, spat vector, mask to apply to the predictors.
+#' @param method string, random, inclusion_buffer (thickening or biased), or raster (unweighted or weighted)
+#' @param n integer, number of background points to select.
+#' @param obs data.frame, containing the observations. Used with method == "thickening" or "inclusion_buffer"
+#' @param width_buffer int, buffer width around observations. Used with method ==  "inclusion_buffer"
+#' @param species string, species name.
+#' @param raster SpatRaster, raster heatmap used for weighted or unweighted sampling, default NULL when not using those methods
+#' @return spatial points
+#' @export
+
+create_background <- function(
+    predictors,
+    mask = NULL,
+    method = c("random","weighted_raster","unweighted_raster","inclusion_buffer","biased","thickening"),
+    n = 10000,  
+    obs = NULL,
+    density_bias = NULL,
+    width_buffer = NULL,
+    raster = NULL) {
+  
+  proj <- terra::crs(predictors, proj = T)
+  
+  ## New method: If we use raster, we re-project our raster and add it as an additional layer
+  if (grepl("raster", method)) {
+    if (is.null(raster)) stop(paste("No raster included with method:", method))
+    
+    # Bilinear interpolation when projecting in this manner
+    # Exclude 0s, since we don't want to sample areas with no sightings
+    raster[raster <= 0] <- NA
+    
+    add(predictors) <- raster
+    
+  }
+  
+  if (!is.null(mask)) {
+    predictors <- fast_crop(predictors, mask)
+  }
+  
+  sum_na_layer <-  terra::tapp(predictors, index = rep(1, terra::nlyr(predictors)), fun = sum, na.rm = F)
+  
+  ncellr <- terra::global(!is.na(sum_na_layer), sum)
+  
+  if (ncellr < n) {
+    message(
+      "Number of background-points exceeds number of cell. ",
+      ncellr,
+      " background-points will be sampled"
+    )
+    backgr <- terra::as.data.frame(predictors, na.rm = TRUE, xy = TRUE) |> dplyr::select(x, y)
+    
+  } else {
+    
+    if (any(method == "random" | grepl("^unweighted", method))) {
+      
+      # all the cells have the same probability to be selected
+      message(sprintf("Selecting %i background point based on %s method.", n, method  ))
+      
+      # Apparently this breaks if there are a lot of NA values.. using a different version for now
+      if (0) {
+        # This version has issues sampling rasters with a lot (or mostly) NA value cells
+        backgr <- terra::spatSample(sum_na_layer,
+                                    size = n,
+                                    method="random", 
+                                    replace = FALSE,
+                                    na.rm = T,
+                                    xy = TRUE, 
+                                    as.points = FALSE, 
+                                    values = F)
+      }else{
+        # cells only retrieves non-NA cells
+        backgr <- sample(cells(sum_na_layer), n)
+        backgr <- xyFromCell(sum_na_layer,backgr)
+        
+      }
+    } else if (any(method == "thickening")) {
+      
+      message(sprintf("Selecting %i background point based on %s method.", n, method  ))
+      
+      obs_vec <- terra::vect(obs, geom = c("lon", "lat"), crs = crs(predictors))
+      
+      if (is.null(width_buffer)) {
+        width_buffer <- mean(terra::distance(obs_vec))
+      } else {
+        width_buffer <- as.numeric(width_buffer)
+      }
+      buf <- terra::buffer(obs_vec, width_buffer, quadsegs = 10)
+      buf_r <- !is.na(terra::rasterize(buf[1], sum_na_layer))
+      for (i in 2:nrow(buf)) {
+        buf_r <- buf_r + !is.na(terra::rasterize(buf[i], sum_na_layer))
+      }
+      buf_r <- terra::mask(buf_r, sum_na_layer)
+      backgr <- terra::spatSample(buf_r,
+                                  size = n,
+                                  method = "weighted", 
+                                  replace = FALSE,
+                                  na.rm = T,
+                                  xy = TRUE, 
+                                  as.points = FALSE, 
+                                  values = F)
+      
+    } else if (any(method == "biased")) {
+      
+      if (!is.null(mask)) density_bias <- fast_crop(density_bias, mask)
+      
+      backgr <- terra::spatSample(density_bias,
+                                  size = n,
+                                  method = "weighted", 
+                                  replace = FALSE,
+                                  na.rm = T,
+                                  xy = TRUE, 
+                                  as.points = FALSE, 
+                                  values = F)
+      
+    } else if (method == "inclusion_buffer") {
+      
+      obs_vec <- terra::vect(obs, geom = c("lon", "lat"), crs = crs(predictors))
+      
+      if (is.null(dist_buffer)) {
+        
+        message("Buffer distance not provided. Using the 95% quantile 
+      of the minimum distance between each point.")
+        dist_buffer <- calculate_dist_buffer(obs |> dplyr::select(lon, lat))
+        message(sprintf("Buffer distance: %s (unit of projection)", dist_buffer))
+        
+      }
+      
+      # Creating the buffer
+      
+      buffer_shape <- rgeos::gBuffer(spgeom = as(obs_vec, "Spatial"),
+                                     byid = FALSE, width = dist_buffer)
+      
+      # crops the predictors to that shape to rasterize
+      sum_na_layer <- fast_crop(sum_na_layer,  buffer_shape)
+      
+      message(sprintf("Trying selecting %i background point based on %s method.", n ,method  ))
+      backgr <- terra::spatSample(sum_na_layer,
+                                  size = n, 
+                                  method="random",
+                                  replace=FALSE,
+                                  na.rm=T,
+                                  xy=TRUE,
+                                  as.points=FALSE, 
+                                  values=F)
+      
+    } else if (grepl("^weighted", method)) {
+      ## Set heatmap cells to NA if they are NA in sum_na_layer
+      ## Assumes the last layer of predictors is the heatmap, and that it is counts!
+      values(predictors[[dim(predictors)[3]]])[which(is.na(values(sum_na_layer)))] <- NA
+      tgb_weights <- predictors[[dim(predictors)[3]]]
+      tgb_weights <- tgb_weights/sum(values(tgb_weights), na.rm = T)
+      
+      #backgr <- sample(cells(tgb_weights), n, prob = unlist(extract(tgb_weights, cells(tgb_weights))))
+      #backgr <- xyFromCell(tgb_weights, backgr)
+
+      # Sampling using density as probabilities
+      # We still run into NA values in the weights, which we should exclude
+      weightsVector <- unlist(extract(tgb_weights, cells(tgb_weights)))
+      names(weightsVector) <- cells(tgb_weights)
+      weightsVector <- na.omit(weightsVector)
+
+      if(length(weightsVector) < n){
+        message(
+          "Number of background-points exceeds number of cell. ",
+          ncellr,
+          " background-points will be sampled"
+        )
+        backgr <- names(weightsVector)
+    
+      }else{
+        backgr <- sample(names(weightsVector), n, prob = weightsVector)
+
+      }
+
+      backgr <- xyFromCell(tgb_weights, as.numeric(backgr))
+    }
+  }
+  message(sprintf("%s selected", nrow(backgr)))
+  
+  #species <- unique(obs$scientific_name) This didn't work if there are more than one equivalent species names
+  species <- obs$scientific_name[1]
+  backgr <- dplyr::bind_cols(id = 1:nrow(backgr),
+                             scientific_name = species,
+                             backgr |> data.frame()) |>
+    setNames(c("id", "scientific_name", "lon", "lat"))
+  
+  return(backgr)
+}
+
+
+calculate_dist_buffer <- function(obs, n = 1000) {
+  #Uses the first 1000 points (randomly sampled) to create buffers and distances
+  if (nrow(obs) > n) {
+    nb_buffer_point <- n
+  } else {
+    nb_buffer_point <- nrow(obs) - 1
+  }
+  
+  sample_locations <- obs[sample(c(1:nrow(obs)),
+                                 size = nb_buffer_point, replace = FALSE), ]
+  #Uses the 95% quantile of the minimum distance between each point
+  distance <- raster::pointDistance(sample_locations, lonlat = FALSE)
+  mindist <- c()
+  for (q in 1:ncol(distance)) {
+    distance_zero <- distance[which(distance[, q] > 0), q]
+    mindist <- c(mindist, min(distance_zero))
+  }
+  dist_buffer <- 2 * stats::quantile(mindist, 0.95)
+  return(dist_buffer)
+}

From ea7ec609ee5ff181e2ba129f243b9a96ac2e5032 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= <rouf1703@usherbrooke.ca>
Date: Fri, 8 Sep 2023 15:41:53 -0400
Subject: [PATCH 02/10] check argument value to make sure it goes to a specific
 place

---
 scripts/SDM/selectBackgroundFunc.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/SDM/selectBackgroundFunc.R b/scripts/SDM/selectBackgroundFunc.R
index 875d3b65..5f560ff6 100644
--- a/scripts/SDM/selectBackgroundFunc.R
+++ b/scripts/SDM/selectBackgroundFunc.R
@@ -24,6 +24,8 @@ create_background <- function(
     width_buffer = NULL,
     raster = NULL) {
   
+  method <- match.arg(method)
+  
   proj <- terra::crs(predictors, proj = T)
   
   ## New method: If we use raster, we re-project our raster and add it as an additional layer

From 59c1dde06ff07cec4abb8a8c9ad4cd2d6ed3f57c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= <rouf1703@usherbrooke.ca>
Date: Wed, 13 Sep 2023 16:26:49 -0400
Subject: [PATCH 03/10] slightly better documentation for create_background

---
 scripts/SDM/selectBackgroundFunc.R | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/scripts/SDM/selectBackgroundFunc.R b/scripts/SDM/selectBackgroundFunc.R
index 5f560ff6..d2f2cdec 100644
--- a/scripts/SDM/selectBackgroundFunc.R
+++ b/scripts/SDM/selectBackgroundFunc.R
@@ -1,15 +1,21 @@
 
 
 #' @title Create background points
+#' 
+#' @description 
+#' Generates background points using any of the six available methods.
+#' 
+#' @details
+#' When `method = "random"`, background points are randomly sampled throughout the whole study extent. When `method = "weighted_raster"`, background points are sampled in proportion to the number of observations of a target group in an observation density raster. When `method = "unweighted_raster"`, background points are sampled only in cells where there are observations from a target group. With `method = "inclusion_buffer"`, background points are sampled within a buffer around observations (to be confirmed...). With `method = "thickening"`, background points are sampled in proportion the local density of observations by sampling in a buffer around each observation (to be confirmed...). Finally, when `method = "biased"`, a `density_bias` raster representing the effort is given and background points are sampled in proportion to this raster (to be confirmed...).  
 #'
 #' @name create_background
-#' @param predictors spat raster, containing the predictor variables
-#' @param mask, spat vector, mask to apply to the predictors.
-#' @param method string, random, inclusion_buffer (thickening or biased), or raster (unweighted or weighted)
-#' @param n integer, number of background points to select.
-#' @param obs data.frame, containing the observations. Used with method == "thickening" or "inclusion_buffer"
-#' @param width_buffer int, buffer width around observations. Used with method ==  "inclusion_buffer"
-#' @param species string, species name.
+#' @param predictors SpatRasterr, containing the predictor variables
+#' @param mask SpatVector, mask to apply to the predictors.
+#' @param method one of "random","weighted_raster","unweighted_raster","inclusion_buffer","biased","thickening".
+#' @param n integer, number of background points to sample.
+#' @param obs data.frame, containing the observations. Used with "thickening" or "inclusion_buffer".
+#' @param density_bias SpatRaster giving an effort/bias surface from which background points are sampled
+#' @param width_buffer int, buffer width around observations.  Used with "thickening" or "inclusion_buffer".
 #' @param raster SpatRaster, raster heatmap used for weighted or unweighted sampling, default NULL when not using those methods
 #' @return spatial points
 #' @export
@@ -196,7 +202,7 @@ create_background <- function(
 
 
 calculate_dist_buffer <- function(obs, n = 1000) {
-  #Uses the first 1000 points (randomly sampled) to create buffers and distances
+  # uses the first 1000 points (randomly sampled) to create buffers and distances
   if (nrow(obs) > n) {
     nb_buffer_point <- n
   } else {

From bd027d2544fa2198b57c702b953b7a579f0f7fcf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= <rouf1703@usherbrooke.ca>
Date: Fri, 29 Sep 2023 12:03:28 -0400
Subject: [PATCH 04/10] as.data.frame does not check names which avoids the
 conversion of - in . in col names coming from io and do not convert col names
 when reading data

---
 scripts/SDM/runMaxent.R     |  2 +-
 scripts/SDM/runMaxentFunc.R | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/scripts/SDM/runMaxent.R b/scripts/SDM/runMaxent.R
index aa24adbf..339ed95f 100644
--- a/scripts/SDM/runMaxent.R
+++ b/scripts/SDM/runMaxent.R
@@ -31,7 +31,7 @@ print("Inputs : ")
 print(input)
 
 
-presence_background <- read.table(file = input$presence_background, sep = '\t', header = TRUE) 
+presence_background <- read.table(file = input$presence_background, sep = '\t', header = TRUE, check.names = FALSE) 
 predictors <- terra::rast(unlist(input$predictors))
 mod_tuning <- run_maxent(presence_background, 
                          with_raster = F, # can be set to F to speed up
diff --git a/scripts/SDM/runMaxentFunc.R b/scripts/SDM/runMaxentFunc.R
index 5b35e5a9..1ce92af1 100644
--- a/scripts/SDM/runMaxentFunc.R
+++ b/scripts/SDM/runMaxentFunc.R
@@ -22,8 +22,8 @@ run_maxent <- function(presence.bg, with_raster = F,
                         parallelType = "doParallel"
 ) {
   
-  presence <- presence.bg |> dplyr::filter(pa == 1) |> data.frame()
-  background <- presence.bg |> dplyr::filter(pa == 0) |> data.frame()
+  presence <- presence.bg |> dplyr::filter(pa == 1) |> as.data.frame()
+  background <- presence.bg |> dplyr::filter(pa == 0) |> as.data.frame()
   
   if (with_raster) {
     ENMmodel <- ENMeval::ENMevaluate(occs = presence[, c("lon", "lat")],
@@ -259,8 +259,8 @@ predict_maxent <- function(presence_background,
 
   # We calculate the prediction with the whole dataset
 
-      presence <- presence_background |> dplyr::filter(pa == 1) |> data.frame()
-      background <- presence_background |> dplyr::filter(pa == 0) |> data.frame()
+      presence <- presence_background |> dplyr::filter(pa == 1) |> as.data.frame()
+      background <- presence_background |> dplyr::filter(pa == 0) |> as.data.frame()
       
 
       mod_tuning <- ENMeval::ENMevaluate(occs = presence[, c("lon", "lat", layers)], 
@@ -304,8 +304,8 @@ runs <- c("run_1")
       backg_test <- backgr[bg.grp == g, ]
       presence_bg_train <- group.all[which(group.all[,1] == g),]
       
-      presence <- presence_bg_train |> dplyr::filter(pa == 1) |> data.frame()
-      background <- presence_bg_train |> dplyr::filter(pa == 0) |> data.frame()
+      presence <- presence_bg_train |> dplyr::filter(pa == 1) |> as.data.frame()
+      background <- presence_bg_train |> dplyr::filter(pa == 0) |> as.data.frame()
       
 
       mod_tuning <- ENMeval::ENMevaluate(occs = presence[, c("lon", "lat", layers)], 

From a47fb13ef92f70db88078aca1ff0c7cfc03fcd20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= <rouf1703@usherbrooke.ca>
Date: Mon, 2 Oct 2023 16:50:06 -0400
Subject: [PATCH 05/10] some transition to terra (started because raster also
 converts - to . in variable names)

---
 scripts/SDM/runMaxent.R     | 10 +++++-----
 scripts/SDM/runMaxentFunc.R | 18 ++++++++++++------
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/scripts/SDM/runMaxent.R b/scripts/SDM/runMaxent.R
index 339ed95f..ef16bb20 100644
--- a/scripts/SDM/runMaxent.R
+++ b/scripts/SDM/runMaxent.R
@@ -52,7 +52,7 @@ mod_tuning <- run_maxent(presence_background,
 res_tuning <- mod_tuning@results
 tuned_param <- select_param(res_tuning, method = input$method_select_params, list = T)
 
-predictors <- raster::stack(predictors)
+#predictors <- raster::stack(predictors)
 
 sdms <- predict_maxent(presence_background,
   algorithm = "maxent.jar", 
@@ -73,17 +73,17 @@ names(sdm_pred) <- "prediction"
 sdm_runs <- sdms[["pred_runs"]]
 
 pred.output <- file.path(outputFolder, "sdm_pred.tif")
-runs.output <- paste0(outputFolder,"/sdm_runs_", 1:nlayers(sdm_runs), ".tif")
+runs.output <- paste0(outputFolder,"/sdm_runs_", 1:nlyr(sdm_runs), ".tif")
 #runs.output <- file.path(outputFolder, "sdm_runs.tif")
 
-sdm_pred<-project(rast(sdm_pred),crs(input$proj)) ##Temporary fix while maxent transitions to terra
+sdm_pred<-project(sdm_pred,crs(input$proj)) ##Temporary fix while maxent transitions to terra
 terra::writeRaster(x = sdm_pred,
                           filename = pred.output,
                           filetype = "COG",
                           wopt= list(gdal=c("COMPRESS=DEFLATE")),
                           overwrite = TRUE)
-for (i in 1:nlayers(sdm_runs)){
-    thisrun<-project(rast(sdm_runs[[i]]),crs(input$proj))  ##Temporary fix while maxent transitions to terra
+for (i in 1:nlyr(sdm_runs)){
+    thisrun<-project(sdm_runs[[i]],crs(input$proj))  ##Temporary fix while maxent transitions to terra
     terra::writeRaster(x = thisrun,
     filename = file.path(outputFolder, paste0("/sdm_runs_", i, ".tif")),
     filetype = "COG",
diff --git a/scripts/SDM/runMaxentFunc.R b/scripts/SDM/runMaxentFunc.R
index 1ce92af1..70a89b56 100644
--- a/scripts/SDM/runMaxentFunc.R
+++ b/scripts/SDM/runMaxentFunc.R
@@ -322,12 +322,18 @@ runs <- c("run_1")
       pred_pres <- dismo::predict(mod_tuning@models[[tuned_param]], predictors,
                                   args = sprintf("outputformat=%s", type))
 
-        if (inherits(pred_runs, "RasterLayer") || inherits(pred_runs, "RasterStack")) {
-              pred_runs <- raster::stack(pred_runs, pred_pres)
-         
-        } else {
-           pred_runs <-  pred_pres 
-           }
+        #if (inherits(pred_runs, "RasterLayer") || inherits(pred_runs, "RasterStack")) {
+        #    pred_runs <- raster::stack(pred_runs, pred_pres)
+        #} else {
+        #   pred_runs <-  pred_pres 
+        #}
+      if (is.null(pred_runs)) {
+        pred_runs <- pred_pres
+      } else {
+        pred_runs <-  c(pred_runs, pred_pres) 
+      }
+      
+        
       
  }
 

From 1596a529283d2fc65b2dae3fac7e161d3b4047a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= <rouf1703@usherbrooke.ca>
Date: Tue, 3 Oct 2023 14:57:24 -0400
Subject: [PATCH 06/10] changed some defaults for faster pipeline runs and more
 predictors

---
 pipelines/SDM/SDM_maxEnt.json | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/pipelines/SDM/SDM_maxEnt.json b/pipelines/SDM/SDM_maxEnt.json
index a9f2cec4..0fd939d9 100644
--- a/pipelines/SDM/SDM_maxEnt.json
+++ b/pipelines/SDM/SDM_maxEnt.json
@@ -366,7 +366,6 @@
       "data": {
         "type": "text[]",
         "value": [
-          "L",
           "LQ",
           "LQHP"
         ]
@@ -384,8 +383,7 @@
         "type": "float[]",
         "value": [
           0.5,
-          1,
-          2
+          1
         ]
       }
     },
@@ -852,7 +850,7 @@
         "checkerboard1",
         "checkerboard2"
       ],
-      "example": "block"
+      "example": "randomkfold"
     },
     "data>loadFromStac.yml@119|stac_url": {
       "description": "URL of the Stac catalog",
@@ -865,15 +863,19 @@
       "label": "collections_items",
       "type": "text[]",
       "example": [
-        "chelsa-clim|bio1",
-        "chelsa-clim|bio2"
+         "chelsa-clim|bio1",
+         "chelsa-clim|bio7",
+         "chelsa-clim|bio12",
+         "earthenv_topography|elevation_median",
+         "earthenv_landcover|class_3",
+         "earthenv_landcover|class_12"
       ]
     },
     "data>loadFromStac.yml@119|spatial_res": {
       "description": "Integer, spatial resolution of the rasters",
       "label": "spatial resolution",
       "type": "int",
-      "example": 1000
+      "example": 5000
     },
     "data>loadFromStac.yml@119|mask": {
       "description": "Shapefile, used to mask the output rasters",

From 50d366986c4e3b5a07fa44957782408996e64cb2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= <rouf1703@usherbrooke.ca>
Date: Tue, 3 Oct 2023 16:25:57 -0400
Subject: [PATCH 07/10] default back to block

---
 pipelines/SDM/SDM_maxEnt.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/SDM/SDM_maxEnt.json b/pipelines/SDM/SDM_maxEnt.json
index 0fd939d9..8e494161 100644
--- a/pipelines/SDM/SDM_maxEnt.json
+++ b/pipelines/SDM/SDM_maxEnt.json
@@ -850,7 +850,7 @@
         "checkerboard1",
         "checkerboard2"
       ],
-      "example": "randomkfold"
+      "example": "block"
     },
     "data>loadFromStac.yml@119|stac_url": {
       "description": "URL of the Stac catalog",

From 6c58e74400c1b1857c17cc1a5438120533a8cc5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= <rouf1703@usherbrooke.ca>
Date: Wed, 4 Oct 2023 11:39:52 -0400
Subject: [PATCH 08/10] replaced with a simplified version and added some
 options that were unavailable/not working

---
 scripts/SDM/selectBackgroundFunc.R | 237 +++++++++++------------------
 1 file changed, 87 insertions(+), 150 deletions(-)

diff --git a/scripts/SDM/selectBackgroundFunc.R b/scripts/SDM/selectBackgroundFunc.R
index f0dc9cf4..d9690571 100644
--- a/scripts/SDM/selectBackgroundFunc.R
+++ b/scripts/SDM/selectBackgroundFunc.R
@@ -8,6 +8,7 @@
 #' @param method string, random, inclusion_buffer (thickening or biased), or raster (unweighted or weighted)
 #' @param n integer, number of background points to select.
 #' @param obs data.frame, containing the observations. Used with method == "thickening" or "inclusion_buffer"
+#' @param density_bias spatRaster of densities for method biased (ignored in the pipeline at the moment)
 #' @param width_buffer int, buffer width around observations. Used with method ==  "inclusion_buffer"
 #' @param species string, species name.
 #' @param raster SpatRaster, raster heatmap used for weighted or unweighted sampling, default NULL when not using those methods
@@ -17,182 +18,118 @@
 create_background <- function(
     predictors,
     mask = NULL,
-    method = "random",
+    method = c("random","weighted_raster","unweighted_raster","inclusion_buffer","biased","thickening"),
     n = 10000,  
     obs = NULL,
     density_bias = NULL,
     width_buffer = NULL,
     raster = NULL) {
   
-  proj <- terra::crs(predictors, proj = T)
+  method <- match.arg(method)
+  
+  proj <- terra::crs(predictors)
   
   ## New method: If we use raster, we re-project our raster and add it as an additional layer
   if (grepl("raster", method)) {
     if (is.null(raster)) stop(paste("No raster included with method:", method))
-    
-    # Bilinear interpolation when projecting in this manner
-    # Exclude 0s, since we don't want to sample areas with no sightings
-    raster[raster <= 0] <- NA
-    
-    add(predictors) <- raster
-    
+  }
+  
+  if (method == "biased") {
+    if (is.null(density_bias)) stop(paste("No density_bias included with method:", method))
   }
   
   if (!is.null(mask)) {
     predictors <- fast_crop(predictors, mask)
   }
   
-  sum_na_layer <-  terra::tapp(predictors, index = rep(1, terra::nlyr(predictors)), fun = sum, na.rm = F)
-  
-  ncellr <- terra::global(!is.na(sum_na_layer), sum)
-  
-  if (ncellr < n) {
-    message(
-      "Number of background-points exceeds number of cell. ",
-      ncellr,
-      " background-points will be sampled"
-    )
-    backgr <- terra::as.data.frame(predictors, na.rm = TRUE, xy = TRUE) |> dplyr::select(x, y)
+  if (method %in% c("inclusion_buffer","thickening")){
+    ### Not sure why the following buffer width method was originally used. package FNN could be faster for searching nearest neighbours.
+    #message("Argument width_buffer not provided. Using the 95% quantile 
+    #  of the nearest neighbour distance that is not in the same location.")
+    #
+    #if(nrow(obs) > 1000){
+    #  sampled_obs <- sample(1:nrow(obs), min(1000, nrow(obs)))
+    #  message("Only 1000 randomly sampled locations are used to determine the width_buffer.")
+    #}else{
+    #  sampled_obs <- 1:nrow(obs)
+    #}
+    #width_buffer<-quantile(st_distance(obs[sampled_obs,]),0.95)
+    #  
+    #width_buffer<-obs[sampled_obs,] |>
+    #                st_distance() |> 
+    #                apply(2,function(x){min(x[x>0])}) |> 
+    #                quantile(0.95)
+    #  
+    #message(sprintf("width-buffer used is %s (in the units of the crs)", width_buffer))
     
-  } else {
+    if (is.null(obs)){
+      stop(paste("No obs included with method:", method))
+    }
     
-    if (any(method == "random" | grepl("^unweighted", method))) {
-      
-      # all the cells have the same probability to be selected
-      message(sprintf("Selecting %i background point based on %s method.", n, method  ))
-      
-      # Apparently this breaks if there are a lot of NA values.. using a different version for now
-      if (0) {
-        # This version has issues sampling rasters with a lot (or mostly) NA value cells
-        backgr <- terra::spatSample(sum_na_layer,
-                                    size = n,
-                                    method="random", 
-                                    replace = FALSE,
-                                    na.rm = T,
-                                    xy = TRUE, 
-                                    as.points = FALSE, 
-                                    values = F)
-      }else{
-        # cells only retrieves non-NA cells
-        backgr <- sample(cells(sum_na_layer), n)
-        backgr <- xyFromCell(sum_na_layer,backgr)
-        
-      }
-    } else if (any(method == "thickening")) {
-      
-      message(sprintf("Selecting %i background point based on %s method.", n, method  ))
-      
-      obs_vec <- terra::vect(obs, geom = c("lon", "lat"), crs = crs(predictors))
-      
-      if (is.null(width_buffer)) {
-        width_buffer <- mean(terra::distance(obs_vec))
-      } else {
-        width_buffer <- as.numeric(width_buffer)
-      }
-      buf <- terra::buffer(obs_vec, width_buffer, quadsegs = 10)
-      buf_r <- !is.na(terra::rasterize(buf[1], sum_na_layer))
-      for (i in 2:nrow(buf)) {
-        buf_r <- buf_r + !is.na(terra::rasterize(buf[i], sum_na_layer))
-      }
-      buf_r <- terra::mask(buf_r, sum_na_layer)
-      backgr <- terra::spatSample(buf_r,
-                                  size = n,
-                                  method = "weighted", 
-                                  replace = FALSE,
-                                  na.rm = T,
-                                  xy = TRUE, 
-                                  as.points = FALSE, 
-                                  values = F)
-      
-    } else if (any(method == "biased")) {
-      
-      if (!is.null(mask)) density_bias <- fast_crop(density_bias, mask)
-      
-      backgr <- terra::spatSample(density_bias,
-                                  size = n,
-                                  method = "weighted", 
-                                  replace = FALSE,
-                                  na.rm = T,
-                                  xy = TRUE, 
-                                  as.points = FALSE, 
-                                  values = F)
-      
-    } else if (method == "inclusion_buffer") {
-      
-      obs_vec <- terra::vect(obs, geom = c("lon", "lat"), crs = crs(predictors))
-      
-      if (is.null(dist_buffer)) {
-        
-        message("Buffer distance not provided. Using the 95% quantile 
-      of the minimum distance between each point.")
-        dist_buffer <- calculate_dist_buffer(obs |> dplyr::select(lon, lat))
-        message(sprintf("Buffer distance: %s (unit of projection)", dist_buffer))
-        
-      }
-      
-      # Creating the buffer
-      
-      buffer_shape <- rgeos::gBuffer(spgeom = as(obs_vec, "Spatial"),
-                                     byid = FALSE, width = dist_buffer)
-      
-      # crops the predictors to that shape to rasterize
-      sum_na_layer <- fast_crop(sum_na_layer,  buffer_shape)
-      
-      message(sprintf("Trying selecting %i background point based on %s method.", n ,method  ))
-      backgr <- terra::spatSample(sum_na_layer,
-                                  size = n, 
-                                  method="random",
-                                  replace=FALSE,
-                                  na.rm=T,
-                                  xy=TRUE,
-                                  as.points=FALSE, 
-                                  values=F)
-      
-    } else if (grepl("^weighted", method)) {
-      ## Set heatmap cells to NA if they are NA in sum_na_layer
-      ## Assumes the last layer of predictors is the heatmap, and that it is counts!
-      values(predictors[[dim(predictors)[3]]])[which(is.na(values(sum_na_layer)))] <- NA
-      tgb_weights <- predictors[[dim(predictors)[3]]]
-      tgb_weights <- tgb_weights/sum(values(tgb_weights), na.rm = T)
-      
-      #backgr <- sample(cells(tgb_weights), n, prob = unlist(extract(tgb_weights, cells(tgb_weights))))
-      #backgr <- xyFromCell(tgb_weights, backgr)
-
-      # Sampling using density as probabilities
-      # We still run into NA values in the weights, which we should exclude
-      weightsVector <- unlist(extract(tgb_weights, cells(tgb_weights)))
-      names(weightsVector) <- cells(tgb_weights)
-      weightsVector <- na.omit(weightsVector)
-
-      if(length(weightsVector) < n){
-        message(
-          "Number of background-points exceeds number of cell. ",
-          ncellr,
-          " background-points will be sampled"
-        )
-        backgr <- names(weightsVector)
+    obs <- st_as_sf(obs, coords=c("lon", "lat"), crs = proj)
     
-      }else{
-        backgr <- sample(names(weightsVector), n, prob = weightsVector)
-
-      }
-
-      backgr <- xyFromCell(tgb_weights, as.numeric(backgr))
+    if(is.null(width_buffer)){
+      width_buffer <- 0.1 * max(diff(st_bbox(obs)[c(1, 3)]),diff(st_bbox(obs)[c(2, 4)]))
+      message(sprintf("width_buffer used is 10%% of the largest bounding box dimension (%s in the units of the crs)", round(width_buffer)))
     }
+    
   }
-  message(sprintf("%s selected", nrow(backgr)))
   
-  #species <- unique(obs$scientific_name) This didn't work if there are more than one equivalent species names
-  species <- obs$scientific_name[1]
-  backgr <- dplyr::bind_cols(id = 1:nrow(backgr),
-                             scientific_name = species,
-                             backgr |> data.frame()) |>
-    setNames(c("id", "scientific_name", "lon", "lat"))
+  backgr<-switch(method,
+                 random = {
+                   terra::spatSample(predictors[[1]], size = n, method = "random", replace = TRUE, values = FALSE, as.points = TRUE)
+                 },
+                 weighted_raster = {
+                   terra::spatSample(raster, size = n, method = "weights", replace = TRUE, values = FALSE, as.points = TRUE)
+                 },
+                 unweighted_raster = {
+                   terra::spatSample(raster > 0, size = n, method = "weights", replace = TRUE, values = FALSE, as.points = TRUE)
+                 },
+                 inclusion_buffer = { # this thing can take a while if there are many observations
+                   obs |> 
+                     st_buffer(dist = width_buffer, nQuadSegs = 10) |> 
+                     st_union() |> 
+                     st_sample(n) |> 
+                     st_as_sf()
+                 },
+                 biased = { # not sure how this method differs from weighted_raster, ignored at the moment as an option
+                   terra::spatSample(density_bias, size = n, method = "weights", replace = TRUE, values = FALSE, as.points = TRUE)
+                 },
+                 thickening = { 
+                   # There is probably a faster way using a radius and angle from each observations. Right now, rasterizing the buffers and sampling from the raster seems much faster than sampling within the buffers using sf (commented version)
+                   #backgr<-obs |> 
+                   #  st_buffer(dist = width_buffer, nQuadSegs = 10) |> 
+                   #  st_sample(size = rep(ceiling(n / nrow(obs)),nrow(obs)), by_polygon = FALSE) |> 
+                   #  st_as_sf()
+                   #backgr[sample(1:nrow(backgr),n),]
+                   terra::spatSample(
+                     rasterize(st_buffer(obs, dist = width_buffer, nQuadSegs = 10), rast(predictors[[1]]), fun = sum),
+                     size = n, 
+                     method = "weights", 
+                     replace = TRUE, 
+                     values = FALSE, 
+                     as.points = TRUE
+                   )
+                 }
+  )
+  backgr <- backgr[,-1] # temporary fix for https://github.com/rspatial/terra/issues/1275
+  backgr <- st_as_sf(backgr) |> 
+    st_coordinates() |>
+    data.frame(id = 1:nrow(backgr), scientific_name = obs$scientific_name[1], lon = _) |>
+    setNames(c("id","scientific_name","lon","lat"))
+  backgr
   
-  return(backgr)
 }
 
 
+
+
+
+
+
+
+
+
 calculate_dist_buffer <- function(obs, n = 1000) {
   #Uses the first 1000 points (randomly sampled) to create buffers and distances
   if (nrow(obs) > n) {

From f95c695a19ff636466f408a72d961eb2762fa07d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Rousseu?= <rouf1703@usherbrooke.ca>
Date: Wed, 4 Oct 2023 11:41:07 -0400
Subject: [PATCH 09/10] added background thickening as an option

---
 pipelines/SDM/SDM_maxEnt.json    | 3 ++-
 scripts/SDM/selectBackground.yml | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/pipelines/SDM/SDM_maxEnt.json b/pipelines/SDM/SDM_maxEnt.json
index 8e494161..bbf2c6af 100644
--- a/pipelines/SDM/SDM_maxEnt.json
+++ b/pipelines/SDM/SDM_maxEnt.json
@@ -761,7 +761,8 @@
         "random",
         "inclusion_buffer",
         "weighted_raster",
-        "unweighted_raster"
+        "unweighted_raster",
+        "thickening"
       ],
       "example": "random"
     },
diff --git a/scripts/SDM/selectBackground.yml b/scripts/SDM/selectBackground.yml
index 1ebc31b5..9bd0e30c 100644
--- a/scripts/SDM/selectBackground.yml
+++ b/scripts/SDM/selectBackground.yml
@@ -26,6 +26,7 @@ inputs:
       - inclusion_buffer
       - weighted_raster
       - unweighted_raster
+      - thickening
     example: random
   n_background: 
     label: number of background points

From 113bad475b5a1b1c870d300705555f1b479feb88 Mon Sep 17 00:00:00 2001
From: frousseu <francois.rousseu@usherbrooke.ca>
Date: Wed, 4 Oct 2023 13:25:27 -0400
Subject: [PATCH 10/10] added some metada to pipeline

---
 pipelines/SDM/SDM_maxEnt.json | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/pipelines/SDM/SDM_maxEnt.json b/pipelines/SDM/SDM_maxEnt.json
index bbf2c6af..5323f76e 100644
--- a/pipelines/SDM/SDM_maxEnt.json
+++ b/pipelines/SDM/SDM_maxEnt.json
@@ -864,12 +864,12 @@
       "label": "collections_items",
       "type": "text[]",
       "example": [
-         "chelsa-clim|bio1",
-         "chelsa-clim|bio7",
-         "chelsa-clim|bio12",
-         "earthenv_topography|elevation_median",
-         "earthenv_landcover|class_3",
-         "earthenv_landcover|class_12"
+        "chelsa-clim|bio1",
+        "chelsa-clim|bio7",
+        "chelsa-clim|bio12",
+        "earthenv_topography|elevation_median",
+        "earthenv_landcover|class_3",
+        "earthenv_landcover|class_12"
       ]
     },
     "data>loadFromStac.yml@119|spatial_res": {
@@ -899,5 +899,17 @@
         1
       ]
     }
+  },
+  "metadata": {
+    "name": "Maxent SDM pipeline",
+    "description": "A pipeline to produce species distribution models using Maxent",
+    "author": [
+      {
+        "name": "Sarah Valentin",
+        "identifier": "https://orcid.org/0000-0002-9028-681X"
+      }
+    ],
+    "license": null,
+    "external_link": null
   }
 }
\ No newline at end of file