From 6f3ac4ae37fa477c2805fdc8431c8d5331cb19ff Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Mon, 18 Sep 2023 13:19:50 -0700 Subject: [PATCH 1/3] fetchSDA_spatial: adds `geom.src='mlrapolygon'` for querying MLRA boundaries by MLRARSYM - currently `mlrapolygon` is not a SSURGO table, but the official ZIP file can be queried directly via GDAL using MLRARSYM as the query attribute - in the future, if SDA ever provides MLRA geometry, the GDAL /vsizip/ method will be replaced with a T-SQL equivalent (and full support for other agg methods wil be added) --- R/fetchSDA_spatial.R | 36 ++++++++++++++++++++++++++++-------- man/fetchSDA_spatial.Rd | 18 +++++++++++++----- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/R/fetchSDA_spatial.R b/R/fetchSDA_spatial.R index 0f49f080..56c909dd 100644 --- a/R/fetchSDA_spatial.R +++ b/R/fetchSDA_spatial.R @@ -4,11 +4,11 @@ #' #' A Soil Data Access query returns geometry and key identifying information about the map unit or area of interest. Additional columns from the map unit or legend table can be included; see `add.fields` argument. #' -#' @param x A vector of map unit keys (`mukey`) or national map unit symbols (`nmusym`) for `mupolygon` geometry OR legend keys (`lkey`) or soil survey area symbols (`areasymbol`) for `sapolygon` geometry -#' @param by.col Column name containing map unit identifier `"mukey"`, `"nmusym"`/`"nationalmusym"` for `geom.src` `mupolygon` OR `"areasymbol"`, `"areaname"`, `"mlraoffice"`, `"mouagncyresp"` for `geom.src` `sapolygon`; default is determined by `is.numeric(x)` `TRUE` for `mukey` or `lkey` and `nationalmusym` or `areasymbol` otherwise. +#' @param x A vector of map unit keys (`mukey`) or national map unit symbols (`nmusym`) for `mupolygon` geometry OR legend keys (`lkey`) or soil survey area symbols (`areasymbol`) for `sapolygon` geometry. If `geom.src="mlrapolygon"` then `x` refers to `MLRARSYM` (major land resource area symbols). +#' @param by.col Column name containing map unit identifier `"mukey"`, `"nmusym"`/`"nationalmusym"` for `geom.src="mupolygon"` OR `"areasymbol"`, `"areaname"`, `"mlraoffice"`, `"mouagncyresp"` for `geom.src="sapolygon"`; default is determined by `is.numeric(x)` `TRUE` for `mukey` or `lkey` and `nationalmusym` or `areasymbol` otherwise. #' @param method geometry result type: `"feature"` returns polygons, `"bbox"` returns the bounding box of each polygon (via `STEnvelope()`), and `"point"` returns a single point (via `STPointOnSurface()`) within each polygon. -#' @param geom.src Either `mupolygon` (map unit polygons) or `sapolygon` (soil survey area boundary polygons) -#' @param db Default: `"SSURGO"`. When `geom.src` is `mupolygon`, use STATSGO polygon geometry instead of SSURGO by setting `db = "STATSGO"` +#' @param geom.src Either `mupolygon` (map unit polygons), `sapolygon` (soil survey area boundary polygons), or `mlrapolygon` (major land resource area boundary polygons) +#' @param db Default: `"SSURGO"`. When `geom.src` is `mupolygon`, use STATSGO polygon geometry instead of SSURGO by setting `db = "STATSGO"`. #' @param add.fields Column names from `mapunit` or `legend` table to add to result. Must specify parent table name as the prefix before column name e.g. `mapunit.muname`. #' @param chunk.size Number of values of `x` to process per query. Necessary for large results. Default: `10` #' @param verbose Print messages? @@ -21,10 +21,17 @@ #' Querying regions with complex mapping may require smaller `chunk.size`. Numerically adjacent IDs in the input vector may share common qualities (say, all from same soil survey area or region) which could cause specific chunks to perform "poorly" (slow or error) no matter what the chunk size is. Shuffling the order of the inputs using `sample()` may help to eliminate problems related to this, depending on how you obtained your set of MUKEY/nationalmusym to query. One could feasibly use `muacres` as a heuristic to adjust for total acreage within chunks. #' #' Note that STATSGO data are fetched where `CLIPAREASYMBOL = 'US'` to avoid duplicating state and national subsets of the geometry. -#' +#' +#' A prototype interface, `geom.src="mlrapolygon"`, is provided for obtaining Major Land Resource Area (MLRA) polygon +#' boundaries. When using this geometry source `x` is a vector of `MLRARSYM` (MLRA Symbols). The geometry source is +#' the MLRA Geographic Database v5.2 (2022) which is not (yet) part of Soil Data Access.Instead of SDA, GDAL utilities +#' are used to read a zipped ESRI Shapefile from a remote URL: . +#' Therefore, most additional `fetchSDA_spatial()` arguments are _not_ currently supported for the MLRA geometry source. +#' In the future a `mlrapolygon` table may be added to SDA (analogous to `mupolygon` and `sapolygon`), +#' and the function will be updated accordingly at that time. +#' #' @author Andrew G. Brown, Dylan E. Beaudette #' @examplesIf curl::has_internet() -#' @examples #' \donttest{ #' #' # get spatial data for a single mukey @@ -60,12 +67,14 @@ fetchSDA_spatial <- function(x, chunk.size = 10, verbose = TRUE, as_Spatial = getOption('soilDB.return_Spatial', default = FALSE)) { + geom.src <- match.arg(tolower(geom.src), choices = c("mupolygon", "sapolygon", "mlrapolygon")) db <- match.arg(toupper(db), choices = c("SSURGO", "STATSGO")) # survey area polygons only available in SSURGO if (geom.src == 'sapolygon') { db <- 'SSURGO' } + # statsgo flag use_statsgo <- (db == "STATSGO") @@ -81,8 +90,11 @@ fetchSDA_spatial <- function(x, # be in different chunks x <- unique(x) - # lkey and areasymbol are the option for sapolygon - if (geom.src == 'sapolygon' && (by.col %in% c("mukey", "nmusym", "nationalmusym"))) { + if (geom.src == "mlrapolygon") { + # mlra polygons are not part of SSURGO or STATSGO + by.col <- "MLRARSYM" + } else if (geom.src == 'sapolygon' && (by.col %in% c("mukey", "nmusym", "nationalmusym"))) { + # lkey and areasymbol are the option for sapolygon if (is.numeric(x)) { by.col <- "lkey" } else { @@ -132,6 +144,14 @@ fetchSDA_spatial <- function(x, } mukey.list <- unique(res$lkey) + + } else if (by.col == "MLRARSYM") { + if (!requireNamespace("sf")) { + stop("package 'sf' is required to read MLRA boundaries from ZIP file source", call. = FALSE) + } + return(sf::read_sf("/vsizip//vsicurl/https://www.nrcs.usda.gov/sites/default/files/2022-10/MLRA_52_2022.zip/MLRA_52_2022", query = sprintf("SELECT * FROM MLRA_52 WHERE MLRARSYM IN %s", format_SQL_in_statement(x)), + as_tibble = FALSE, + stringsAsFactors = FALSE)) } else { return(try(stop(paste0("Unknown mapunit identifier (",by.col,")"), call. = FALSE))) } diff --git a/man/fetchSDA_spatial.Rd b/man/fetchSDA_spatial.Rd index a726b12d..8d7b8fbf 100644 --- a/man/fetchSDA_spatial.Rd +++ b/man/fetchSDA_spatial.Rd @@ -17,15 +17,15 @@ fetchSDA_spatial( ) } \arguments{ -\item{x}{A vector of map unit keys (\code{mukey}) or national map unit symbols (\code{nmusym}) for \code{mupolygon} geometry OR legend keys (\code{lkey}) or soil survey area symbols (\code{areasymbol}) for \code{sapolygon} geometry} +\item{x}{A vector of map unit keys (\code{mukey}) or national map unit symbols (\code{nmusym}) for \code{mupolygon} geometry OR legend keys (\code{lkey}) or soil survey area symbols (\code{areasymbol}) for \code{sapolygon} geometry. If \code{geom.src="mlrapolygon"} then \code{x} refers to \code{MLRARSYM} (major land resource area symbols).} -\item{by.col}{Column name containing map unit identifier \code{"mukey"}, \code{"nmusym"}/\code{"nationalmusym"} for \code{geom.src} \code{mupolygon} OR \code{"areasymbol"}, \code{"areaname"}, \code{"mlraoffice"}, \code{"mouagncyresp"} for \code{geom.src} \code{sapolygon}; default is determined by \code{is.numeric(x)} \code{TRUE} for \code{mukey} or \code{lkey} and \code{nationalmusym} or \code{areasymbol} otherwise.} +\item{by.col}{Column name containing map unit identifier \code{"mukey"}, \code{"nmusym"}/\code{"nationalmusym"} for \code{geom.src="mupolygon"} OR \code{"areasymbol"}, \code{"areaname"}, \code{"mlraoffice"}, \code{"mouagncyresp"} for \code{geom.src="sapolygon"}; default is determined by \code{is.numeric(x)} \code{TRUE} for \code{mukey} or \code{lkey} and \code{nationalmusym} or \code{areasymbol} otherwise.} \item{method}{geometry result type: \code{"feature"} returns polygons, \code{"bbox"} returns the bounding box of each polygon (via \code{STEnvelope()}), and \code{"point"} returns a single point (via \code{STPointOnSurface()}) within each polygon.} -\item{geom.src}{Either \code{mupolygon} (map unit polygons) or \code{sapolygon} (soil survey area boundary polygons)} +\item{geom.src}{Either \code{mupolygon} (map unit polygons), \code{sapolygon} (soil survey area boundary polygons), or \code{mlrapolygon} (major land resource area boundary polygons)} -\item{db}{Default: \code{"SSURGO"}. When \code{geom.src} is \code{mupolygon}, use STATSGO polygon geometry instead of SSURGO by setting \code{db = "STATSGO"}} +\item{db}{Default: \code{"SSURGO"}. When \code{geom.src} is \code{mupolygon}, use STATSGO polygon geometry instead of SSURGO by setting \code{db = "STATSGO"}.} \item{add.fields}{Column names from \code{mapunit} or \code{legend} table to add to result. Must specify parent table name as the prefix before column name e.g. \code{mapunit.muname}.} @@ -49,10 +49,17 @@ This function automatically "chunks" the input vector (using \code{makeChunks()} Querying regions with complex mapping may require smaller \code{chunk.size}. Numerically adjacent IDs in the input vector may share common qualities (say, all from same soil survey area or region) which could cause specific chunks to perform "poorly" (slow or error) no matter what the chunk size is. Shuffling the order of the inputs using \code{sample()} may help to eliminate problems related to this, depending on how you obtained your set of MUKEY/nationalmusym to query. One could feasibly use \code{muacres} as a heuristic to adjust for total acreage within chunks. Note that STATSGO data are fetched where \code{CLIPAREASYMBOL = 'US'} to avoid duplicating state and national subsets of the geometry. + +A prototype interface, \code{geom.src="mlrapolygon"}, is provided for obtaining Major Land Resource Area (MLRA) polygon +boundaries. When using this geometry source \code{x} is a vector of \code{MLRARSYM} (MLRA Symbols). The geometry source is +the MLRA Geographic Database v5.2 (2022) which is not (yet) part of Soil Data Access.Instead of SDA, GDAL utilities +are used to read a zipped ESRI Shapefile from a remote URL: \url{https://www.nrcs.usda.gov/sites/default/files/2022-10/MLRA_52_2022.zip}. +Therefore, most additional \code{fetchSDA_spatial()} arguments are \emph{not} currently supported for the MLRA geometry source. +In the future a \code{mlrapolygon} table may be added to SDA (analogous to \code{mupolygon} and \code{sapolygon}), +and the function will be updated accordingly at that time. } \examples{ \dontshow{if (curl::has_internet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -\dontshow{\}) # examplesIf} \donttest{ # get spatial data for a single mukey @@ -77,6 +84,7 @@ Note that STATSGO data are fetched where \code{CLIPAREASYMBOL = 'US'} to avoid d # demo adding a field (`muname`) to attribute table of result head(try(fetchSDA_spatial(x = "2x8l5", by="nmusym", add.fields="muname"))) } +\dontshow{\}) # examplesIf} } \author{ Andrew G. Brown, Dylan E. Beaudette From d2e2da2d2f535a731cf27c377286a79e122f664e Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Mon, 18 Sep 2023 16:40:47 -0700 Subject: [PATCH 2/3] news --- NEWS.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS.md b/NEWS.md index b7fe9f21..9420f2a4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# soilDB 2.7.9.9000 (2023-09-18) + - `fetchSDA_spatial()` gains `geom.src="mlrapolygon"` for obtaining Major Land Resource Area (MLRA) polygon boundaries. When using this geometry source `x` is a vector of `MLRARSYM` (MLRA Symbols). + + - The geometry source is the MLRA Geographic Database v5.2 (2022) which is not (yet) part of Soil Data Access. Instead of SDA, GDAL utilities are used to read a zipped ESRI Shapefile from a remote URL: . Therefore, most additional `fetchSDA_spatial()` arguments are _not_ currently supported for the MLRA geometry source. In the future a `mlrapolygon` table may be added to SDA (analogous to `mupolygon` and `sapolygon`), and the function will be updated accordingly at that time. + + # soilDB 2.7.9 (2023-09-01) ## Bug Fixes From 6bdf6da80ed28c3aa905db7cca53866c8a6ea2ac Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Mon, 25 Sep 2023 16:44:47 -0700 Subject: [PATCH 3/3] standardize geometry column name for mlra output --- R/fetchSDA_spatial.R | 13 +++++++++---- man/fetchSDA_spatial.Rd | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/R/fetchSDA_spatial.R b/R/fetchSDA_spatial.R index 56c909dd..6bdbfb24 100644 --- a/R/fetchSDA_spatial.R +++ b/R/fetchSDA_spatial.R @@ -24,7 +24,7 @@ #' #' A prototype interface, `geom.src="mlrapolygon"`, is provided for obtaining Major Land Resource Area (MLRA) polygon #' boundaries. When using this geometry source `x` is a vector of `MLRARSYM` (MLRA Symbols). The geometry source is -#' the MLRA Geographic Database v5.2 (2022) which is not (yet) part of Soil Data Access.Instead of SDA, GDAL utilities +#' the MLRA Geographic Database v5.2 (2022) which is not (yet) part of Soil Data Access. Instead of SDA, GDAL utilities #' are used to read a zipped ESRI Shapefile from a remote URL: . #' Therefore, most additional `fetchSDA_spatial()` arguments are _not_ currently supported for the MLRA geometry source. #' In the future a `mlrapolygon` table may be added to SDA (analogous to `mupolygon` and `sapolygon`), @@ -149,9 +149,14 @@ fetchSDA_spatial <- function(x, if (!requireNamespace("sf")) { stop("package 'sf' is required to read MLRA boundaries from ZIP file source", call. = FALSE) } - return(sf::read_sf("/vsizip//vsicurl/https://www.nrcs.usda.gov/sites/default/files/2022-10/MLRA_52_2022.zip/MLRA_52_2022", query = sprintf("SELECT * FROM MLRA_52 WHERE MLRARSYM IN %s", format_SQL_in_statement(x)), + res <- sf::read_sf("/vsizip//vsicurl/https://www.nrcs.usda.gov/sites/default/files/2022-10/MLRA_52_2022.zip/MLRA_52_2022", query = sprintf("SELECT * FROM MLRA_52 WHERE MLRARSYM IN %s", format_SQL_in_statement(x)), as_tibble = FALSE, - stringsAsFactors = FALSE)) + stringsAsFactors = FALSE) + # use "geom" for consistency with other spatial outputs from SDA; requires sf >= 1.0-6 + sf::st_geometry(res) <- "geom" + # TODO: could provide custom MLRA aggregation methods here: centroid, bbox, convex hull? + # in the future a T-SQL implementation would allow for any of the defined method options + return(res) } else { return(try(stop(paste0("Unknown mapunit identifier (",by.col,")"), call. = FALSE))) } @@ -195,7 +200,7 @@ fetchSDA_spatial <- function(x, subchunk.res <- lapply(mukeys, function(xx) { sub.res <- .fetchSDA_spatial(mukeys, geom.type, geom.src, use_statsgo, add.fields, - verbose, paste0(i,"_",xx)) + verbose, paste0(i, "_", xx)) if (inherits(sub.res$result, 'try-error')) { # explicit handling for a hypothetical unqueryable single mukey diff --git a/man/fetchSDA_spatial.Rd b/man/fetchSDA_spatial.Rd index 8d7b8fbf..c58a1182 100644 --- a/man/fetchSDA_spatial.Rd +++ b/man/fetchSDA_spatial.Rd @@ -52,7 +52,7 @@ Note that STATSGO data are fetched where \code{CLIPAREASYMBOL = 'US'} to avoid d A prototype interface, \code{geom.src="mlrapolygon"}, is provided for obtaining Major Land Resource Area (MLRA) polygon boundaries. When using this geometry source \code{x} is a vector of \code{MLRARSYM} (MLRA Symbols). The geometry source is -the MLRA Geographic Database v5.2 (2022) which is not (yet) part of Soil Data Access.Instead of SDA, GDAL utilities +the MLRA Geographic Database v5.2 (2022) which is not (yet) part of Soil Data Access. Instead of SDA, GDAL utilities are used to read a zipped ESRI Shapefile from a remote URL: \url{https://www.nrcs.usda.gov/sites/default/files/2022-10/MLRA_52_2022.zip}. Therefore, most additional \code{fetchSDA_spatial()} arguments are \emph{not} currently supported for the MLRA geometry source. In the future a \code{mlrapolygon} table may be added to SDA (analogous to \code{mupolygon} and \code{sapolygon}),