Skip to content

Commit

Permalink
implement projections in full
Browse files Browse the repository at this point in the history
  • Loading branch information
dramanica committed Dec 10, 2024
1 parent d83fe35 commit 71171ee
Show file tree
Hide file tree
Showing 14 changed files with 180 additions and 16 deletions.
4 changes: 4 additions & 0 deletions R/thin_by_cell.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
#' Further thinning can be achieved by aggregating cells in the raster
#' before thinning, as achieved by setting `agg_fact` > 1 (aggregation works in a
#' manner equivalent to [terra::aggregate()]).
#' Note that if `data` is an `sf` object, the function will transform the coordinates
#' to the same projection as the `raster` (recommended); if `data` is a data.frame, it is up
#' to the user to ensure that the coordinates are in the correct units.
#'
#' @param data An [`sf::sf`] data frame, or a data frame with coordinate variables.
#' These can be defined in `coords`, unless they have standard names
Expand Down Expand Up @@ -36,6 +39,7 @@ thin_by_cell <- function(data, raster, coords = NULL, drop_na = TRUE, agg_fact =
# add type checks for these parameters
return_sf <- FALSE # flag whether we need to return an sf object
if (inherits(data, "sf")) {
data <- data %>% sf::st_transform(terra::crs(raster))
if (all(c("X", "Y") %in% names(data))) {
if (!all(data[, c("X", "Y")] %>% sf::st_drop_geometry() %>% as.matrix() == sf::st_coordinates(data)) |
any(is.na(data[, c("X", "Y")]))) {
Expand Down
7 changes: 5 additions & 2 deletions R/thin_by_cell_time.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#' Further spatial thinning can be achieved by aggregating cells in the raster
#' before thinning, as achieved by setting `agg_fact` > 1 (aggregation works in a
#' manner equivalent to [terra::aggregate()]).
#' Note that if `data` is an `sf` object, the function will transform the coordinates
#' to the same projection as the `raster` (recommended); if `data` is a data.frame, it is up
#' to the user to ensure that the coordinates are in the correct units.
#'
#' @param data An [`sf::sf`] data frame, or a data frame with coordinate variables.
#' These can be defined in `coords`, unless they have standard names
Expand Down Expand Up @@ -49,14 +52,14 @@ thin_by_cell_time <- function(data, raster, coords = NULL, time_col = "time",
if (inherits(raster, "SpatRasterDataset")) {
raster <- raster[[1]]
}

if(inherits(raster, "stars")) {
d <- stars::st_dimensions(raster)
time <- stars::st_get_dimension_values(raster, "time")
raster <- as(raster, "SpatRaster")
terra::time(raster, tstep = d$time$refsys) <- time
}

time_steps <- terra::time(raster)

if (any(is.na(time_steps))) {
Expand Down
17 changes: 16 additions & 1 deletion R/thin_by_dist.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@
#' `c("longitude", "latitude")`, or `c("lon", "lat")`
#' @param dist_min Minimum distance between points (in units appropriate for
#' the projection, or meters for lonlat data).
#' @param dist_method method to compute distance, either "euclidean" or "great_circle".
#' Defaults to "great_circle", which is more accurate but takes slightly longer.
#' @returns An object of class [`sf::sf`] or [`data.frame`], the same as "data".
#' @export
#' @importFrom rlang :=

# This code is an adaptation of spThin to work on sf objects

thin_by_dist <- function(data, dist_min, coords = NULL) {
thin_by_dist <- function(data, dist_min, coords = NULL, dist_method = c("great_circle", "euclidean")) {
return_dataframe <-
FALSE # flag whether we need to return a data.frame
if (!inherits(data, "sf")) {
Expand All @@ -35,6 +37,14 @@ thin_by_dist <- function(data, dist_min, coords = NULL) {
return_dataframe <- TRUE
}

#use the proper method of distance calculation by changing projection if necessary
dist_method <- match.arg(dist_method)
if (dist_method == "great_circle") {
# store the original projection
original_crs <- sf::st_crs(data)
data <- sf::st_transform(data, 4326)
}

# compute distances with sf, using the appropriate units for the projection
dist_mat <- sf::st_distance(data)
units(dist_min) <- units(dist_mat)
Expand Down Expand Up @@ -85,6 +95,11 @@ thin_by_dist <- function(data, dist_min, coords = NULL) {

## Subset the original dataset
thinned_points <- data[points_to_keep, ]
# if we used great circle distances, we need to transform back
if (dist_method == "great_circle") {
thinned_points <- sf::st_transform(thinned_points, original_crs)
}

if (return_dataframe) {
thinned_points <- thinned_points %>%
dplyr::bind_cols(sf::st_coordinates(thinned_points)) %>% # re-add coordinates
Expand Down
18 changes: 17 additions & 1 deletion R/thin_by_dist_time.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,16 @@
#' @param dist_min Minimum distance between points (in units appropriate for
#' the projection, or meters for lonlat data).
#' @param interval_min Minimum time interval between points, in days.
#' @param dist_method method to compute distance, either "euclidean" or "great_circle".
#' Defaults to "great_circle", which is more accurate but takes slightly longer.
#' @returns An object of class [`sf::sf`] or [`data.frame`], the same as "data".
#' @export
#' @importFrom rlang :=

# This code is an adaptation of spThin to work on sf objects

thin_by_dist_time <- function(data, dist_min, interval_min, coords = NULL,
time_col = "time", lubridate_fun = c) {
time_col = "time", lubridate_fun = c, dist_method = c("great_circle", "euclidean")) {
return_dataframe <-
FALSE # flag whether we need to return a data.frame
# cast to sf if needed
Expand All @@ -44,6 +46,14 @@ thin_by_dist_time <- function(data, dist_min, interval_min, coords = NULL,
return_dataframe <- TRUE
}

#use the proper method of distance calculation by changing projection if necessary
dist_method <- match.arg(dist_method)
if (dist_method == "great_circle") {
# store the original projection
original_crs <- sf::st_crs(data)
data <- sf::st_transform(data, 4326)
}

# create a vector of times formatted as proper dates
time_lub <- data[, time_col] %>%
as.data.frame() %>%
Expand Down Expand Up @@ -108,6 +118,12 @@ thin_by_dist_time <- function(data, dist_min, interval_min, coords = NULL,

## Subset the original dataset
thinned_points <- data[points_to_keep, ]

# if we used great circle distances, we need to transform back
if (dist_method == "great_circle") {
thinned_points <- sf::st_transform(thinned_points, original_crs)
}

if (return_dataframe) {
thinned_points <- thinned_points %>%
dplyr::bind_cols(sf::st_coordinates(thinned_points)) %>% # re-add coordinates
Expand Down
File renamed without changes.
4 changes: 4 additions & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
Albers
BCI
Babak
BlockCV
Breugel
CMD
CRS
DALEX
Ecography
Ecol
Expand Down Expand Up @@ -79,6 +81,7 @@ lacertidae
lat
lh
lon
longlat
lonlat
lqpht
lubridate
Expand All @@ -99,6 +102,7 @@ pearson
pred
prepping
prob
proj
pseudoabsence
pseudoabsences
quasiquotation
Expand Down
3 changes: 3 additions & 0 deletions man/thin_by_cell.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/thin_by_cell_time.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 9 additions & 1 deletion man/thin_by_dist.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/thin_by_dist_time.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions tests/testthat/test_thin_by_cell.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
test_that("thin_by_cell respects projections", {
# get the lacerta data and set crs to latlong
lacerta <- sf::st_as_sf(lacerta, coords = c("longitude", "latitude"))
sf::st_crs(lacerta) <- "+proj=longlat"
# get the raster (with crs latlong)
land_mask <- terra::readRDS(system.file("extdata/lacerta_land_mask.rds",
package = "tidysdm"
))
# and npw project it
iberia_proj4 <- "+proj=aea +lon_0=-4.0 +lat_1=36.8 +lat_2=42.6 +lat_0=39.7 +datum=WGS84 +units=m +no_defs"
land_mask <- terra::project(land_mask, y = iberia_proj4)
# thin the data with a mismatch in projections
set.seed(123)
lacerta_thin <- thin_by_cell(lacerta, land_mask)
# now project the points
lacerta_proj <- sf::st_transform(lacerta, iberia_proj4)
# and thin the data with matching projections
set.seed(123)
lacerta_thin_proj <- thin_by_cell(lacerta_proj, land_mask)
# check that the thinning is the same
expect_equal(lacerta_thin, lacerta_thin_proj)
# confirm that if we had used a data.frame with the wrong projection we would get a nonsense result
lacerta_df <- as.data.frame(lacerta) %>% dplyr::bind_cols(sf::st_coordinates(lacerta))
set.seed(123)
lacerta_thin_df <- thin_by_cell(lacerta_df, land_mask)
expect_false(nrow(lacerta_thin_df) == nrow(lacerta_thin))

})
24 changes: 24 additions & 0 deletions tests/testthat/test_thin_by_dist.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,27 @@ test_that("thin_by_dist removes the correct points", {
# plot(grid_raster,colNA="darkgray")
# polys(terra::as.polygons(grid_raster))
# points(vect(locations), col="red", cex=2)


test_that("thin_by_dist respects the projection",{
# get the lacerta data and set crs to latlong
lacerta <- sf::st_as_sf(lacerta, coords = c("longitude", "latitude"))
sf::st_crs(lacerta) <- "+proj=longlat"
# and npw project it
iberia_proj4 <- "+proj=aea +lon_0=-4.0 +lat_1=36.8 +lat_2=42.6 +lat_0=39.7 +datum=WGS84 +units=m +no_defs"
lacerta_proj <- sf::st_transform(lacerta, iberia_proj4)
# thin the data with a mismatch in projections
set.seed(123)
lacerta_thin_gc <- thin_by_dist(lacerta_proj, dist_min = 20000) # great circle method
set.seed(123)
lacerta_thin_eu <- thin_by_dist(lacerta_proj, dist_min = 20000, dist_method = "euclidean") # euclidean method
# check that the thinning is not the same
expect_false(nrow(lacerta_thin_gc)==nrow(lacerta_thin_eu))
# check that the great circle method did not remove the crs from the data
expect_equal(sf::st_crs(lacerta_thin_gc), sf::st_crs(lacerta_proj))
# now thin the original dataset (in latlong)
set.seed(123)
lacerta_thin_gc_ll <- thin_by_dist(lacerta, dist_min = 20000)
# expect this to be identical to the great circle method
expect_equal(lacerta_thin_gc$ID, lacerta_thin_gc_ll$ID)
})
42 changes: 42 additions & 0 deletions tests/testthat/test_thin_by_dist_time.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,45 @@ test_that("thin_by_dist_time removes the correct points", {
# plot(grid_raster,colNA="darkgray")
# polys(terra::as.polygons(grid_raster))
# points(vect(locations), col="red", cex=2)


test_that("thin_by_dist_time respects the projection",{
# get the lacerta data and set crs to latlong
horses <- sf::st_as_sf(horses, coords = c("longitude", "latitude"))
sf::st_crs(horses) <- 4326
# and npw project it
iberia_proj4 <- "+proj=merc +lon_0=0 +lat_ts=10 +k=0.5 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"
horses_proj <- sf::st_transform(horses, iberia_proj4)
# thin the data with a mismatch in projections
set.seed(123)
horses_thin_gc <- thin_by_dist_time(horses_proj,
dist_min = km2m(100),
interval_min = y2d(2000),
time_col = "time_bp",
lubridate_fun = pastclim::ybp2date
) # great circle method
set.seed(123)
horses_thin_eu <- thin_by_dist_time(horses_proj,
dist_min = km2m(100),
interval_min = y2d(2000),
time_col = "time_bp",
lubridate_fun = pastclim::ybp2date,
dist_method = "euclidean"
) # euclidean method
# check that the thinning is not the same
expect_false(nrow(horses_thin_gc)==nrow(horses_thin_eu))

# check that the great circle method did not remove the crs from the data
expect_equal(sf::st_crs(horses_thin_gc), sf::st_crs(horses_proj))
# now thin the original dataset (in latlong)
set.seed(123)
horses_thin_gc_ll <- thin_by_dist_time(horses,
dist_min = km2m(100),
interval_min = y2d(2000),
time_col = "time_bp",
lubridate_fun = pastclim::ybp2date
)
# expect this to be identical to the great circle method
expect_equal(horses_thin_gc$time_bp, horses_thin_gc_ll$time_bp)
})

Loading

0 comments on commit 71171ee

Please sign in to comment.