diff --git a/NAMESPACE b/NAMESPACE index 5ed36f6..d54c4df 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(table_addColumn) export(table_addLocation) export(table_addSingleLocation) export(table_export) +export(table_findOverlappingLocations) export(table_getLocationID) export(table_getNearestDistance) export(table_getNearestLocation) diff --git a/R/table_findOverlappingLocations.R b/R/table_findOverlappingLocations.R new file mode 100644 index 0000000..f0561a1 --- /dev/null +++ b/R/table_findOverlappingLocations.R @@ -0,0 +1,127 @@ +#' +#' @title Finds overlapping locations in a known locations table. +#' +#' @description Calculates distances between all locations within a known +#' locations table and returns a tibble with the row indices and separation +#' distances of those records with overlapping locations. +#' +#' It is useful when working with new metadata tables to identify overlapping +#' locations early on so that decisions can be made about the apporpriateness +#' of the specified \code{radius}. +#' +#' @param tbl Tibble with \code{longitude} and \code{latitude} columns. +#' @param radius Radius in meters. +#' +#' @return Tibble of row indices and distances for those locations which overlap. +#' +#' @examples +#' library(MazamaLocationUtils) +#' +#' meta <- wa_airfire_meta +#' +#' # Anything locations closer than 2 km? (diameter = 2*radius) +#' table_findOverlappingLocations(meta, radius = 1000) +#' +#' # How about 4 km? +#' table_findOverlappingLocations(meta, radius = 2000) +#' +#' # Let's look at those locations +#' +#' tooCloseTbl <- table_findOverlappingLocations(meta, radius = 2000) +#' +#' for ( i in seq_len(nrow(tooCloseTbl)) ) { +#' rows <- as.numeric(tooCloseTbl[i, 1:2]) +#' cat(sprintf("\n%5.1f meters apart:\n", tooCloseTbl$distance[i])) +#' print(meta[rows, c('longitude', 'latitude', 'siteName')]) +#' } +#' +#' +#' @rdname table_findOverlappingLocations +#' @export +#' @importFrom MazamaCoreUtils stopIfNull +#' @importFrom dplyr tibble filter all_of +#' @importFrom rlang .data +#' +table_findOverlappingLocations <- function( + tbl = NULL, + radius = NULL +) { + + # ----- Validate parameters -------------------------------------------------- + + MazamaCoreUtils::stopIfNull(tbl) + MazamaCoreUtils::stopIfNull(radius) + + if ( !"data.frame" %in% class(tbl) ) + stop("Parameter 'tbl' is not of class \"data.frame\".") + + if ( !"longitude" %in% names(tbl) ) + stop("Parameter 'tbl' does not have a 'longitude' column.") + + if ( !"latitude" %in% names(tbl) ) + stop("Parameter 'tbl' does not have a 'latitude' column.") + + if ( !is.numeric(radius) ) + stop("Parameter 'radius' must be a numeric value.") + + diameter <- 2 * radius + + # ----- Check for locations that are too close ------------------------------- + + # Calculate distances between each location + distances <- geodist::geodist(tbl) + + # Get distances that are less than the given diameter + # NOTE: the distance between a location and itself is always zero + distancesLessThanR <- (distances != 0) & (distances < diameter ) + + # Select the locations that are "too close". + tooClose <- which(distancesLessThanR > 0, arr.ind = TRUE) + + if ( nrow(tooClose) == 0 ) { + + # Return an empty tibble + tooCloseTbl <- + dplyr::tibble( + row1 = 1, + row2 = 1, + distance = as.numeric(NA) + ) %>% dplyr::filter( + .data$row1 == -999 + ) + + } else { + # NOTE: If location a and b are too close, two entries will be returned: + # NOTE: row col + # NOTE: [#,] a b + # NOTE: ... + # NOTE: [#,] b a + # + # NOTE: While often the case, there is no guarantee that complementary + # NOTE: rows will be adjacent to eachother. The next couple of lines + # NOTE: find the rows that have the same indices and reduce the table to + # NOTE: only unique pairs. + + sortedMatrix <- t(apply(tooClose, 1, sort)) + tooClose <- sortedMatrix[!duplicated(sortedMatrix),] + + tooCloseTbl <- dplyr::tibble( + row1 = tooClose[,1], + row2 = tooClose[,2], + distance = as.numeric(NA) + ) + + for ( i in seq_len(nrow(tooClose)) ) { + tooCloseTbl$distance[i] <- + distances[tooCloseTbl$row1[i], tooCloseTbl$row2[i]] + } + + tooCloseTbl <- tooCloseTbl %>% dplyr::arrange(.data$distance) + + } + + # ----- Return --------------------------------------------------------------- + + return(tooCloseTbl) + +} diff --git a/R/table_initializeExisting.R b/R/table_initializeExisting.R index d3d219d..c4c8e01 100644 --- a/R/table_initializeExisting.R +++ b/R/table_initializeExisting.R @@ -42,11 +42,19 @@ #' codes, Default: 'NaturalEarthAdm1' #' @param countryCodes Vector of country codes used to optimize spatial #' searching. (See ?MazamaSpatialUtils::getStateCode()) -#' @param radius Maximum distance in meters between two locations that would -#' be considered "too close" +#' @param radius Radius in meters, Default: NULL #' @param verbose Logical controlling the generation of progress messages. #' -#' @return Known location tibble with the specified metadata columns. +#' @return Known location tibble with the specified metadata columns. Any +#' locations whose circles (as defined by \code{radius}) overlap will generate +#' warning messages. +#' +#' It is incumbent upon the user to address these issue by one of: +#' +#' \enumerate{ +#' \item{reduce the radius until no overlaps occur} +#' \item{assign one of the overlapping locations to the other location} +#' } #' #' @rdname table_initializeExisting #' @export @@ -67,6 +75,7 @@ table_initializeExisting <- function( # ----- Validate parameters -------------------------------------------------- MazamaCoreUtils::stopIfNull(tbl) + MazamaCoreUtils::stopIfNull(radius) if ( !"data.frame" %in% class(tbl) ) stop("Parameter 'tbl' is not of class \"data.frame\".") @@ -87,15 +96,17 @@ table_initializeExisting <- function( if ( "locationID" %in% names(tbl) ) stop("Parameter 'tbl' already has a column named \"locationID\"") - if ( !is.null(radius) && !is.numeric(radius) ) + if ( !is.numeric(radius) ) stop("Parameter 'radius' must be a numeric value.") + diameter <- 2 * radius + # ----- Create locationTbl --------------------------------------------------- tblColumns <- names(tbl) locationTbl <- tbl - + # * locationID ----- locationTbl$locationID <- location_createID( @@ -116,7 +127,7 @@ table_initializeExisting <- function( if ( verbose ) message("Searching for countryCodes...") - + locationTbl$countryCode <- MazamaSpatialUtils::getCountryCode( lon = locationTbl$longitude, lat = locationTbl$latitude, @@ -143,7 +154,7 @@ table_initializeExisting <- function( ) } - + # * locationName ----- if ( !"locationName" %in% tblColumns ) { @@ -240,57 +251,77 @@ table_initializeExisting <- function( # TODO: This doesn't seem to reorder like I thought it should. locationTbl <- dplyr::select(locationTbl, all_of(allColumns)) - # ----- Check for locations that are too close ------------------------------- - if ( !is.null(radius) ) { - - # Calculate distances between each location - distances <- geodist::geodist(locationTbl) - - # Get distances that are less than the given radius - # NOTE: the distance between a location and itself is always zero - distancesLessThanR <- (distances != 0) & (distances < radius) + # Calculate distances between each location + distances <- geodist::geodist(locationTbl) + + # Get distances that are less than the given diameter + # NOTE: the distance between a location and itself is always zero + distancesLessThanR <- (distances != 0) & (distances < diameter ) + + # Select the locations that are "too close". + tooClose <- which(distancesLessThanR > 0, arr.ind = TRUE) + + if ( nrow(tooClose) > 0 ) { - # Select the locations that are "too close". - tooClose <- which(distancesLessThanR > 0, arr.ind = TRUE) + # NOTE: If location a and b are too close, two entries will be returned: + # NOTE: row col + # NOTE: [#,] a b + # NOTE: ... + # NOTE: [#,] b a + # + # NOTE: While often the case, there is no guarantee that complementary + # NOTE: rows will be adjacent to eachother. The next couple of lines + # NOTE: find the rows that have the same indices and reduce the table to + # NOTE: only unique pairs. - # NOTE: If location a and b are too close, two entries will be returned: - # row col - # [1,] a b - # [2,] b a - # Hence, we select every other entry: - tooClose <- tooClose[seq(1, nrow(tooClose), 2), ] + sortedMatrix <- t(apply(tooClose, 1, sort)) + tooClose <- sortedMatrix[!duplicated(sortedMatrix),] tooCloseCount <- nrow(tooClose) # Format the first line of the warning message firstLine <- sprintf( - "%d locations have neighbors that are < %d m away", + "%d locations have neighbors that are < %d m apart\n", round(tooCloseCount), - radius + diameter ) - # Create a warning line for each location pair - lines <- c(firstLine) - for ( i in seq(nrow(tooClose)) ) { + # Create a vector of lines, on for each tooClose location pair + tooCloseLines <- vector("character", length = tooCloseCount) + for ( i in seq_len(nrow(tooClose)) ) { dist <- distances[tooClose[i, 1], tooClose[i, 2]] - newLine <- sprintf( - "Entries %s %s. Distance: %s m", + tooCloseLines[i] <- sprintf( + "Distance: %6.1f -- rows %s %s", + round(dist, 1), tooClose[i, 1], - tooClose[i, 2], - round(dist, 2) + tooClose[i, 2] ) - lines <- append(lines, newLine) - } + + instructions <- " +The presence of locations closer than twice the specified radius invalidate the +uniqueness of a 'known locations' table and should be rectified. There are two +basic options: - # Print the warning message - message(paste(lines, collapse = "\n")) - } + 1) Reduce the radius to less than the minimum distance. + 2) Manually merge nearby locations to share the same longitude, latitude and + locationID + +Please review the returned locationTbl for the identified rows. + " + + lines <- c(firstLine, tooCloseLines, instructions) + + # Paste the lines together + warning(paste(lines, collapse = "\n")) + + } + # ----- Return --------------------------------------------------------------- return(locationTbl) diff --git a/man/table_findOverlappingLocations.Rd b/man/table_findOverlappingLocations.Rd new file mode 100644 index 0000000..1733c78 --- /dev/null +++ b/man/table_findOverlappingLocations.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/table_findOverlappingLocations.R +\name{table_findOverlappingLocations} +\alias{table_findOverlappingLocations} +\title{Finds overlapping locations in a known locations table.} +\usage{ +table_findOverlappingLocations(tbl = NULL, radius = NULL) +} +\arguments{ +\item{tbl}{Tibble with \code{longitude} and \code{latitude} columns.} + +\item{radius}{Radius in meters.} +} +\value{ +Tibble of row indices and distances for those locations which overlap. +} +\description{ +Calculates distances between all locations within a known +locations table and returns a tibble with the row indices and separation +distances of those records with overlapping locations. + +It is useful when working with new metadata tables to identify overlapping +locations early on so that decisions can be made about the apporpriateness +of the specified \code{radius}. +} +\examples{ +library(MazamaLocationUtils) + +meta <- wa_airfire_meta + +# Anything locations closer than 2 km? (diameter = 2*radius) +table_findOverlappingLocations(meta, radius = 1000) + +# How about 4 km? +table_findOverlappingLocations(meta, radius = 2000) + +# Let's look at those locations + +tooCloseTbl <- table_findOverlappingLocations(meta, radius = 2000) + +for ( i in seq_len(nrow(tooCloseTbl)) ) { + rows <- as.numeric(tooCloseTbl[i, 1:2]) + cat(sprintf("\n\%5.1f meters apart:\n", tooCloseTbl$distance[i])) + print(meta[rows, c('longitude', 'latitude', 'siteName')]) +} + + +} diff --git a/man/table_initializeExisting.Rd b/man/table_initializeExisting.Rd index 1c95f52..6e7268b 100644 --- a/man/table_initializeExisting.Rd +++ b/man/table_initializeExisting.Rd @@ -22,13 +22,21 @@ codes, Default: 'NaturalEarthAdm1'} \item{countryCodes}{Vector of country codes used to optimize spatial searching. (See ?MazamaSpatialUtils::getStateCode())} -\item{radius}{Maximum distance in meters between two locations that would -be considered "too close"} +\item{radius}{Radius in meters, Default: NULL} \item{verbose}{Logical controlling the generation of progress messages.} } \value{ -Known location tibble with the specified metadata columns. +Known location tibble with the specified metadata columns. Any +locations whose circles (as defined by \code{radius}) overlap will generate +warning messages. + +It is incumbent upon the user to address these issue by one of: + +\enumerate{ +\item{reduce the radius until no overlaps occur} +\item{assign one of the overlapping locations to the other location} +} } \description{ An existing table may have much of the data that is needed