From dc2d953c3880ed77cae2730cb4cd9ee22b3e3ac5 Mon Sep 17 00:00:00 2001 From: Stephen Roecker Date: Thu, 28 Sep 2023 13:15:47 -0500 Subject: [PATCH 1/3] Update fetchNASIS_report.R fixing issues with NULL tables and empty report columns --- R/fetchNASIS_report.R | 233 +++++++++++++++++++++++------------------- 1 file changed, 126 insertions(+), 107 deletions(-) diff --git a/R/fetchNASIS_report.R b/R/fetchNASIS_report.R index 7c1341a4..23f46b20 100644 --- a/R/fetchNASIS_report.R +++ b/R/fetchNASIS_report.R @@ -4,8 +4,8 @@ nullFragsAreZero = TRUE, soilColorState = "moist", stringsAsFactors = NULL - ) { - +) { + if (!missing(stringsAsFactors) && is.logical(stringsAsFactors)) { .Deprecated(msg = sprintf("stringsAsFactors argument is deprecated.\nSetting package option with `NASISDomainsAsFactor(%s)`", stringsAsFactors)) NASISDomainsAsFactor(stringsAsFactors) @@ -14,71 +14,91 @@ tf <- "C:/ProgramData/USDA/NASIS/Temp/fetchNASIS.txt" if (!is.null(url)) tf <- url - + # sanity check .checks$soilColorState(soilColorState) - + # check if temp file exists if (!file.exists(tf) & is.null(url)) { stop("the temp file ", tf, "\n doesn't exist, please run the fetchNASIS report from NASIS") } - + temp <- readLines(tf) - - be <- data.frame(table = c("site", "pediagfeatures", "phorizon", "phcolor"), - begin = grep("@begin", temp), - end = grep("@end", temp)) - + + be <- data.frame( + table = c("site", "pediagfeatures", "phorizon", "phcolor"), + begin = grep("@begin", temp), + end = grep("@end", temp) + ) + # check to see if there is any data - diff.idx <- be$end - be$begin - - if(all(diff.idx == 1)) + be$diff <- be$end - be$begin + + if(all(be$diff == 1)) stop("empty result set -- check parameters used to run `fetchNASIS` export report.", call.=FALSE) - + split(be, be$table) ->.; lapply(., function(x) { + if (x$diff > 1) { x2 <- temp[seq(x$begin + 1, x$end - 1)] + # remove "\" from lines and blank lines e.g. || + x2 <- gsub('"|\\|*$', "", x2) x2 <- read.csv(textConnection(x2), sep = "|", quote = "", stringsAsFactors = FALSE) # aggregate NASIS returns empty rows # NASIS text reports return empty columns # remove - x2 <- x2[!is.na(x2$peiid), -ncol(x2)] + # x2 <- x2[, -ncol(x2)] idx <- names(x2) %in% c("pmkind", "pmorigin") - x2[!idx] <- uncode(x2[!idx]) + x2[!idx] <- soilDB::uncode(x2[!idx]) idx <- sapply(x2, is.character) x2[idx] <- lapply(x2[idx], function(x) ifelse(x == "", NA, x)) return(x2) + } else return(NULL) }) -> .; names(.) <- c("pediagfeatures", "phcolor", "phorizon", "site") - - + + # simplify colors - .$phcolor <- .color(.$phcolor, soilColorState = soilColorState) - - + if (!is.null(.$phcolor)) { + .$phcolor <- .color(.$phcolor, soilColorState = soilColorState) + } + + # fix problems - .$phorizon <- .fix_problems(hz_data = .$phorizon, - site_data = .$site, - pedon_id = "peiid", - hzdept = "hzdept", - hzdepb = "hzdepb", - rmHzErrors = rmHzErrors, - nullFragsAreZero = nullFragsAreZero, - soilColorState = soilColorState - ) - - - # upgrade to SoilProfilecollection - h <- merge(.$phorizon, .$phcolor, by = "phiid", all.x = TRUE, sort = FALSE) - depths(h) <- peiid ~ hzdept + hzdepb - - - # left-join via peiid - # < 0.1 second for ~ 4k pedons .$site = .fix_site_problems(.$site, nullFragsAreZero = nullFragsAreZero) - site(h) <- .$site - - + + + if (!is.null(.$phorizon)) { + .$phorizon <- .fix_problems(hz_data = .$phorizon, + site_data = .$site, + pedon_id = "peiid", + hzdept = "hzdept", + hzdepb = "hzdepb", + rmHzErrors = rmHzErrors, + nullFragsAreZero = nullFragsAreZero, + soilColorState = soilColorState + ) + + + # upgrade to SoilProfilecollection + h <- .$phorizon + depths(h) <- peiid ~ hzdept + hzdepb + + # set optional hz designation and texture slots + hzdesgnname(h) <- "hzname" + hztexclname(h) <- "texture" + + site(h) <- .$site + + if (!is.null(.$phcolor)) horizons(h) <- .$phcolor + + } else { + h <- aqp::SoilProfileCollection( + site = .$site + ) + } + + # tidy .$pediagfeatures pediagfeatures <- .$pediagfeatures pediagfeatures[-1] <- lapply(pediagfeatures[-1], function(x) { @@ -86,31 +106,34 @@ }) # pediagfeatures[!is.na(.$pediagfeatures)] <- TRUE site(h) <- pediagfeatures - - vars <- names(.$pediagfeatures)[-1] - pediagfeatures <- stats::reshape(.$pediagfeatures, - direction = "long", - timevar = "featkind", times = vars, - v.names = "featdep", varying = vars - ) - pediagfeatures <- pediagfeatures[! is.na(pediagfeatures$featdep), ] - featdep <- { - x <- strsplit(pediagfeatures$featdep, "-") - featdept <- unlist(lapply(x, function(x) as.integer(x[1]))) - suppressWarnings(featdepb <- unlist(lapply(x, function(x) as.integer(x[2])))) - data.frame(featdept, featdepb) + + if (!is.null(.$pediagfeatures)) { + vars <- names(.$pediagfeatures)[-1] + pediagfeatures <- stats::reshape(.$pediagfeatures, + direction = "long", + timevar = "featkind", times = vars, + v.names = "featdep", varying = vars + ) + pediagfeatures <- pediagfeatures[! is.na(pediagfeatures$featdep), ] + featdep <- { + x <- strsplit(pediagfeatures$featdep, "-") + featdept <- unlist(lapply(x, function(x) as.integer(x[1]))) + suppressWarnings(featdepb <- unlist(lapply(x, function(x) as.integer(x[2])))) + data.frame(featdept, featdepb) + } + pediagfeatures <- cbind(pediagfeatures[c("peiid", "featkind")], featdep) + pediagfeatures$featkind <- gsub("\\.", " ", pediagfeatures$featkind) + + diagnostic_hz(h) <- pediagfeatures } - pediagfeatures <- cbind(pediagfeatures[c("peiid", "featkind")], featdep) - pediagfeatures$featkind <- gsub("\\.", " ", pediagfeatures$featkind) - - diagnostic_hz(h) <- pediagfeatures - - + + + # set metadata m <- metadata(h) m$origin <- "NASIS pedons (export)" metadata(h) <- m - + # set NASIS-specific horizon identifier tryCatch(hzidname(h) <- 'phiid', error = function(e) { if(grepl(e$message, pattern="not unique$")) { @@ -122,15 +145,12 @@ } } }) - - # set optional hz designation and texture slots - hzdesgnname(h) <- "hzname" - hztexclname(h) <- "texture" - + + # done return(h) - - } + +} # temp <- .fetchNASISTemp() @@ -141,16 +161,16 @@ .Deprecated(msg = sprintf("stringsAsFactors argument is deprecated.\nSetting package option with `NASISDomainsAsFactor(%s)`", stringsAsFactors)) NASISDomainsAsFactor(stringsAsFactors) } - + tf = "C:/ProgramData/USDA/NASIS/Temp/get_site_from_NASIS.txt" if (!is.null(url)) tf = url - + # check if temp file exists if (!file.exists(tf) & is.null(url)) { stop("the temp file ", tf, "\n doesn't exist, please run the fetchNASIS report from NASIS") } - - + + # check to see if data is coming from fetchNASIS or get_site temp <- readLines(tf) begin = grep("@begin get_site_from_NASIS", temp) @@ -180,14 +200,14 @@ # obsdate = as.Date(as.character(obsdate)) # classdate = as.Date(as.character(classdate)) # }) - + temp = .fix_site_problems(temp, nullFragsAreZero = nullFragsAreZero) - + # impute missing x_std & y_std if utm are present # idx <- with(temp, ! complete.cases(x_std, y_std) & complete.cases(utmzone, utmeasting, utmnorthing, horizdatnm)) return(temp) - - } + +} @@ -199,12 +219,12 @@ } tf <- "C:/ProgramData/USDA/NASIS/Temp/get_pediagfeatures_from_NASIS.txt" - + # check if temp file exists if (!file.exists(tf)) { stop("the temp file ", tf, "\n doesn't exist, please run the get_pediagfeatures_from_NASIS report from NASIS") } - + temp = read.csv( textConnection( readLines(tf) @@ -221,7 +241,7 @@ # obsdate = as.Date(as.character(obsdate)) # classdate = as.Date(as.character(classdate)) # }) - + } @@ -229,7 +249,7 @@ ## fix some common problems .checks <- list( - + soilColorState = function(soilColorState) { if(! soilColorState %in% c('dry', 'moist')) stop('soilColorState must be either `dry` or `moist`', @@ -241,16 +261,16 @@ # color .color <- function(df, soilColorState = "moist") { - + # convert back to characters / numeric df$colormoistst = as.character(df$colormoistst) df$colorhue = as.character(df$colorhue) - + # careful! # uncode creates factors, so we have to convert to character first df$colorvalue = as.numeric(as.character(df$colorvalue)) df$colorchroma = as.numeric(as.character(df$colorchroma)) - + # sanity check, only attempt to simplify colors if there are > 1 rows if (nrow(df) > 1) { # mix colors as-needed, mixing done in CIE LAB space @@ -260,7 +280,7 @@ # do nothing df } - + ## copy pre-computed colors into a convenience field for plotting # moist colors if (soilColorState == "moist") @@ -268,7 +288,7 @@ # dry colors if (soilColorState == "dry") df$soil_color <- df$dry_soil_color - + return(df) } @@ -283,27 +303,27 @@ nullFragsAreZero = TRUE, soilColorState = "moist" ) { - + # 1 - replace missing lower boundaries missing.lower.depth.idx <- which(!is.na(hz_data[[hzdept]]) & is.na(hz_data[[hzdepb]])) - + # keep track of affected pedon IDs (if none, this will have zero length) assign("missing.bottom.depths", value = unique(hz_data[[pedon_id]][missing.lower.depth.idx]), envir = get_soilDB_env() ) - + if (length(missing.lower.depth.idx) > 0) { message(paste('replacing missing lower horizon depths with top depth + 1cm ... [', length(missing.lower.depth.idx), ' horizons]', sep='')) - + # make edit hz_data[[hzdepb]][missing.lower.depth.idx] <- hz_data[[hzdept]][missing.lower.depth.idx] + 1 } - - + + # 2 - top == bottom ? bottom <- bottom + 1 top.eq.bottom.idx <- which(hz_data[[hzdept]] == hz_data[[hzdepb]]) - + # keep track of affected pedon IDs (if none, this will have zero length) assign('top.bottom.equal', value = unique(hz_data[[pedon_id]][top.eq.bottom.idx]), @@ -315,20 +335,20 @@ ) hz_data[[hzdepb]][top.eq.bottom.idx] <- hz_data[[hzdepb]][top.eq.bottom.idx] + 1 } - + h.test <- aqp::checkHzDepthLogic(hz_data, c('hzdept', 'hzdepb'), idname = pedon_id, fast = TRUE) - + # which are the good (valid) ones? good.ids <- as.character(h.test[[pedon_id]][which(h.test$valid)]) bad.ids <- as.character(h.test[[pedon_id]][which(!h.test$valid)]) bad.horizons <- hz_data[which(!h.test$valid), c(1:4,6,7)] bad.pedon.ids <- site_data[[pedon_id]][which(site_data[[pedon_id]] %in% bad.ids)] - + # optionally filter pedons WITH NO horizonation inconsistencies if (rmHzErrors) { hz_data <- hz_data[which(hz_data[[pedon_id]] %in% good.ids), ] } - + # keep track of those pedons with horizonation errors assign('bad.pedon.ids', value = bad.pedon.ids, @@ -338,33 +358,33 @@ value = data.frame(bad.horizons), envir = get_soilDB_env() ) - + #4 - optionally convert NA fragvol to 0 if (nullFragsAreZero) { - + idx <- grep("fragvol|frags_|gravel|cobbles|stones|boulders|channers|unspecified", names(hz_data)) vars <- names(hz_data)[idx] hz_data[idx] <-lapply(hz_data[idx], function(x) { ifelse(is.na(x), 0, x) }) } - + return(hz_data) - + } .fix_site_problems <- function(site_data, nullFragsAreZero = nullFragsAreZero) { - + if (nullFragsAreZero == TRUE) { idx <- grep("fragvol|frags_|gravel|cobbles|stones|boulders|channers|unspecified", names(site_data)) vars <- names(site_data)[idx] site_data[idx] <-lapply(site_data[idx], function(x) { ifelse(is.na(x), 0, x) - }) + }) } else site_data - + return(site_data) } @@ -382,7 +402,7 @@ # } if (any(p_idx)) { stop("some of the argument parameters are NULL") - } + } # # local report @@ -402,5 +422,4 @@ } return(df) -} - +} \ No newline at end of file From fb9944f607d7a6ee3380f600d89f174c4620d968 Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Sun, 1 Oct 2023 06:05:56 -0700 Subject: [PATCH 2/3] bump tests for FY24 SSURGO refresh --- tests/testthat/test-SDA_query.R | 2 +- tests/testthat/test-get_SDA_hydric.R | 2 +- tests/testthat/test-get_SDA_interpretation.R | 4 ++-- tests/testthat/test-get_SDA_property.R | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test-SDA_query.R b/tests/testthat/test-SDA_query.R index 947f2677..1d282b4a 100644 --- a/tests/testthat/test-SDA_query.R +++ b/tests/testthat/test-SDA_query.R @@ -224,7 +224,7 @@ test_that("SDA_query() works with multi-line records", { # https://github.com/ncss-tech/soilDB/issues/28 expect_true(inherits(x.4, 'data.frame')) - expect_true(nrow(x.4) == 6) + expect_true(nrow(x.4) == 7) }) diff --git a/tests/testthat/test-get_SDA_hydric.R b/tests/testthat/test-get_SDA_hydric.R index d5dad50b..a4c34488 100644 --- a/tests/testthat/test-get_SDA_hydric.R +++ b/tests/testthat/test-get_SDA_hydric.R @@ -11,7 +11,7 @@ test_that("get_SDA_hydric works", { # check classification of mapunits x.nonhydric <- subset(x, x$HYDRIC_RATING == "Nonhydric") - expect_equal(nrow(x.nonhydric), 175) + expect_equal(nrow(x.nonhydric), 174) expect_true(all(x.nonhydric$hydric_majors == 0 & x.nonhydric$hydric_inclusions == 0)) # by mukey diff --git a/tests/testthat/test-get_SDA_interpretation.R b/tests/testthat/test-get_SDA_interpretation.R index 715e5219..abfa3ce4 100644 --- a/tests/testthat/test-get_SDA_interpretation.R +++ b/tests/testthat/test-get_SDA_interpretation.R @@ -1,6 +1,6 @@ target_areas <- c("CA649", "CA630") -target_area_rows <- 223 -target_area_rows_all <- 1031 +target_area_rows <- 221 +target_area_rows_all <- 1021 target_mukeys <- c(463263, 463264) diff --git a/tests/testthat/test-get_SDA_property.R b/tests/testthat/test-get_SDA_property.R index 194e1049..9f8c5b76 100644 --- a/tests/testthat/test-get_SDA_property.R +++ b/tests/testthat/test-get_SDA_property.R @@ -1,11 +1,11 @@ target_areas <- c("CA649", "CA630") -target_area_rows <- 223 # 1:1 with mukey -target_area_rows_all <- 1031 # 1:1 with component -target_area_rows_all_chorizon <- 3297 # 1:1 with chorizon +target_area_rows <- 221 # 1:1 with mukey +target_area_rows_all <- 1021 # 1:1 with component +target_area_rows_all_chorizon <- 3356 # 1:1 with chorizon n_misc_area_rows <- 9 -n_misc_area_rows_all <- 288 +n_misc_area_rows_all <- 285 n_misc_area_rows_all_chorizon <- 19 target_mukeys <- c(463263, 463264) From 89cd2459b693786b192ce3539d61a9104d424f4e Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Mon, 2 Oct 2023 12:18:26 -0700 Subject: [PATCH 3/3] fetchSDA_spatial: add new T-SQL aggregate geometry methods (#299) * fetchSDA_spatial: add new aggregation methods "extent", "convexhull" and "union" - using `geometry::EnvelopeAggregate()` and friends * Add `method="collection"` * Update NEWS.md * Use LOB types for STRING_AGG - one liner is easy to incorporate into query but probably not the most efficient; consider refactor * Generalize additional fields via `add.fields` within aggregation methods * test --- NEWS.md | 8 +++- R/fetchSDA_spatial.R | 59 ++++++++++++++++++-------- man/fetchSDA_spatial.Rd | 2 +- tests/testthat/test-fetchSDA_spatial.R | 10 ++++- 4 files changed, 57 insertions(+), 22 deletions(-) diff --git a/NEWS.md b/NEWS.md index b7fe9f21..63798282 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,11 @@ # soilDB 2.7.9 (2023-09-01) - + + - Added new `method` options for `fetchSDA_spatial()`. Aggregation grouping is controlled by the `by.col` argument. This works for mapunit and survey area polygon geometries, aggregating all polygons in the group for each `mukey`, `nationalmusym`, `lkey`, or `areasymbol` extent. + - `method="extent"` method calculates a bounding rectangle + - `method="convexhull"` calculates the convex hull + - `method="union"` returns a MULTIPOLYGON + - `method="collection"` returns a GEOMETRYCOLLECTION + ## Bug Fixes - Bug fix for `get_vegplot_transpoints_from_NASIS_db()`; using wrong record ID for transect points diff --git a/R/fetchSDA_spatial.R b/R/fetchSDA_spatial.R index 0f49f080..8a02a762 100644 --- a/R/fetchSDA_spatial.R +++ b/R/fetchSDA_spatial.R @@ -6,7 +6,7 @@ #' #' @param x A vector of map unit keys (`mukey`) or national map unit symbols (`nmusym`) for `mupolygon` geometry OR legend keys (`lkey`) or soil survey area symbols (`areasymbol`) for `sapolygon` geometry #' @param by.col Column name containing map unit identifier `"mukey"`, `"nmusym"`/`"nationalmusym"` for `geom.src` `mupolygon` OR `"areasymbol"`, `"areaname"`, `"mlraoffice"`, `"mouagncyresp"` for `geom.src` `sapolygon`; default is determined by `is.numeric(x)` `TRUE` for `mukey` or `lkey` and `nationalmusym` or `areasymbol` otherwise. -#' @param method geometry result type: `"feature"` returns polygons, `"bbox"` returns the bounding box of each polygon (via `STEnvelope()`), and `"point"` returns a single point (via `STPointOnSurface()`) within each polygon. +#' @param method geometry result type: `"feature"` returns polygons, `"bbox"` returns the bounding box of each polygon (via `STEnvelope()`), `"point"` returns a single point (via `STPointOnSurface()`) within each polygon, `"extent"` returns an aggregate bounding box (the extent of all polygons, `geometry::EnvelopeAggregate()`) ), `"convexhull"` (`geometry::ConvexHullAggregate()`) returns the aggregate convex hull around all polygons, `"union"` (`geometry::UnionAggregate()`) and `"collection"` (`geometry::CollectionAggregate()`) return a `MULTIPOLYGON` or a `GEOMETRYCOLLECTION`, respectively, for each `mukey`, `nationalmusym`, or `areasymbol `. In the case of the latter four aggregation methods, the groups for aggregation depend on `by.col` (default by `"mukey"`). #' @param geom.src Either `mupolygon` (map unit polygons) or `sapolygon` (soil survey area boundary polygons) #' @param db Default: `"SSURGO"`. When `geom.src` is `mupolygon`, use STATSGO polygon geometry instead of SSURGO by setting `db = "STATSGO"` #' @param add.fields Column names from `mapunit` or `legend` table to add to result. Must specify parent table name as the prefix before column name e.g. `mapunit.muname`. @@ -71,10 +71,7 @@ fetchSDA_spatial <- function(x, tstart <- Sys.time() - # sanity check: method must be one of: - if (!method %in% c('feature', 'bbox', 'point')) { - stop('method must be one of: `feature`, `bbox`, or `point`.', call. = FALSE) - } + method <- match.arg(tolower(method), c('feature', 'bbox', 'extent', 'envelope', 'point', 'convexhull', 'union', 'collection')) # remove any redundancy in input off the top -- this is important # in case x is not ordered and contains duplicates which will possibly @@ -133,17 +130,26 @@ fetchSDA_spatial <- function(x, mukey.list <- unique(res$lkey) } else { - return(try(stop(paste0("Unknown mapunit identifier (",by.col,")"), call. = FALSE))) + return(try(stop(paste0("Unknown mapunit identifier (", by.col, ")"), call. = FALSE))) } mukey.chunk <- makeChunks(mukey.list, chunk.size) s <- NULL + # alias + if (method == "envelope"){ + method <- "extent" + } + # select method geom.type <- switch(method, feature = 'mupolygongeo.STAsText()', bbox = 'mupolygongeo.STEnvelope().STAsText()', + collection = 'geometry::CollectionAggregate(mupolygongeo).STAsText()', + extent = 'geometry::EnvelopeAggregate(mupolygongeo).STAsText()', + convexhull = 'geometry::ConvexHullAggregate(mupolygongeo).STAsText()', + union = 'geometry::UnionAggregate(mupolygongeo).STAsText()', point = 'mupolygongeo.STPointOnSurface().STAsText()') if (geom.src == 'sapolygon') @@ -166,7 +172,7 @@ fetchSDA_spatial <- function(x, # SDA_query may generate a warning + try-error result chunk.res <- .fetchSDA_spatial(mukeys, geom.type, geom.src, use_statsgo, add.fields, - verbose, i) + verbose, i, by.col) # this almost always is because the query was too big # retry -- do each mukey individually @@ -175,12 +181,11 @@ fetchSDA_spatial <- function(x, subchunk.res <- lapply(mukeys, function(xx) { sub.res <- .fetchSDA_spatial(mukeys, geom.type, geom.src, use_statsgo, add.fields, - verbose, paste0(i,"_",xx)) + verbose, paste0(i, "_", xx), by.col) if (inherits(sub.res$result, 'try-error')) { # explicit handling for a hypothetical unqueryable single mukey - message("Symbol ", xx, " dropped from result due to error! May exceed the JSON serialization limit or have other topologic problems.", - call. = FALSE) + message("Symbol ", xx, " dropped from result due to error! May exceed the JSON serialization limit or have other topologic problems.") return(NULL) } return(sub.res) @@ -230,36 +235,54 @@ fetchSDA_spatial <- function(x, return(s) } -.fetchSDA_spatial <- function(mukey.list, geom.type, geom.src, use_statsgo, add.fields, verbose, .parentchunk = NA) { +.fetchSDA_spatial <- function(mukey.list, geom.type, geom.src, use_statsgo, add.fields, verbose, .parentchunk = NA, by.col) { + base.fields <- "P.mukey, legend.areasymbol, mapunit.nationalmusym" + if (geom.src == "mupolygon") { q <- sprintf( "SELECT - %s AS geom, - P.mukey, legend.areasymbol, mapunit.nationalmusym + %s AS geom, %s FROM %s AS P INNER JOIN mapunit ON P.mukey = mapunit.mukey INNER JOIN legend ON mapunit.lkey = legend.lkey - WHERE P.mukey IN %s %s", + WHERE P.mukey IN %s %s %s", geom.type, + ifelse(grepl("Aggregate", geom.type), + "(SELECT STRING_AGG(value,', ') FROM + (SELECT DISTINCT value FROM STRING_SPLIT(STRING_AGG(CONVERT(NVARCHAR(max), P.mukey), ','),',')) t) AS mukey, + (SELECT STRING_AGG(value,', ') FROM + (SELECT DISTINCT value FROM STRING_SPLIT(STRING_AGG(CONVERT(NVARCHAR(max), legend.areasymbol), ','),',')) t) AS areasymbol, + mapunit.nationalmusym", + "P.mukey, legend.areasymbol, mapunit.nationalmusym"), ifelse(use_statsgo, "gsmmupolygon", "mupolygon"), format_SQL_in_statement(mukey.list), - ifelse(use_statsgo, "AND CLIPAREASYMBOL = 'US'","") + ifelse(use_statsgo, "AND CLIPAREASYMBOL = 'US'",""), + ifelse(grepl("Aggregate", geom.type), + ifelse(by.col == "mukey", "GROUP BY P.mukey, mapunit.nationalmusym", "GROUP BY mapunit.nationalmusym"), "") ) } else if (geom.src == "sapolygon") { + + base.fields <- "legend.areasymbol" + q <- sprintf( "SELECT %s AS geom, legend.lkey, legend.areasymbol FROM sapolygon AS P INNER JOIN legend ON P.lkey = legend.lkey - WHERE legend.lkey IN %s", + WHERE legend.lkey IN %s %s", geom.type, - format_SQL_in_statement(mukey.list) + format_SQL_in_statement(mukey.list), + ifelse(grepl("Aggregate", geom.type), "GROUP BY legend.lkey, legend.areasymbol", "") ) } + # add any additional fields from mapunit/legend if (!is.null(add.fields)) { q <- gsub(q, pattern = "FROM ([a-z]+)polygon", - replacement = paste0(", ", paste0(add.fields, collapse = ", "), " FROM \\1polygon")) + replacement = paste0(", ", paste0(ifelse(rep(grepl("Aggregate", geom.type), length(add.fields)), + sprintf("(SELECT STRING_AGG(value,', ') FROM (SELECT DISTINCT value FROM STRING_SPLIT(STRING_AGG(CONVERT(NVARCHAR(max), %s), ','),',')) t) AS %s", + add.fields, gsub(".*\\.([a-z]+)", "\\1", add.fields)), + base.fields), collapse = ", "), " FROM \\1polygon")) } t1 <- Sys.time() diff --git a/man/fetchSDA_spatial.Rd b/man/fetchSDA_spatial.Rd index a726b12d..30f25fe0 100644 --- a/man/fetchSDA_spatial.Rd +++ b/man/fetchSDA_spatial.Rd @@ -21,7 +21,7 @@ fetchSDA_spatial( \item{by.col}{Column name containing map unit identifier \code{"mukey"}, \code{"nmusym"}/\code{"nationalmusym"} for \code{geom.src} \code{mupolygon} OR \code{"areasymbol"}, \code{"areaname"}, \code{"mlraoffice"}, \code{"mouagncyresp"} for \code{geom.src} \code{sapolygon}; default is determined by \code{is.numeric(x)} \code{TRUE} for \code{mukey} or \code{lkey} and \code{nationalmusym} or \code{areasymbol} otherwise.} -\item{method}{geometry result type: \code{"feature"} returns polygons, \code{"bbox"} returns the bounding box of each polygon (via \code{STEnvelope()}), and \code{"point"} returns a single point (via \code{STPointOnSurface()}) within each polygon.} +\item{method}{geometry result type: \code{"feature"} returns polygons, \code{"bbox"} returns the bounding box of each polygon (via \code{STEnvelope()}), \code{"point"} returns a single point (via \code{STPointOnSurface()}) within each polygon, \code{"extent"} returns an aggregate bounding box (the extent of all polygons, \code{geometry::EnvelopeAggregate()}) ), \code{"convexhull"} (\code{geometry::ConvexHullAggregate()}) returns the aggregate convex hull around all polygons, \code{"union"} (\code{geometry::UnionAggregate()}) and \code{"collection"} (\code{geometry::CollectionAggregate()}) return a \code{MULTIPOLYGON} or a \code{GEOMETRYCOLLECTION}, respectively, for each \code{mukey}, \code{nationalmusym}, or \code{areasymbol }. In the case of the latter four aggregation methods, the groups for aggregation depend on \code{by.col} (default by \code{"mukey"}).} \item{geom.src}{Either \code{mupolygon} (map unit polygons) or \code{sapolygon} (soil survey area boundary polygons)} diff --git a/tests/testthat/test-fetchSDA_spatial.R b/tests/testthat/test-fetchSDA_spatial.R index 96fc7690..cbfbf382 100644 --- a/tests/testthat/test-fetchSDA_spatial.R +++ b/tests/testthat/test-fetchSDA_spatial.R @@ -64,10 +64,16 @@ test_that("fetchSDA_spatial sapolygon and gsmmupolygon", { "Mariposa County Area, California", "Stanislaus County, California, Northern Part"))) - # test STATSGO mupolygon + # test STATSGO gsmmupolygon (5 bbox around 5 delineations) statsgo.bbox <- fetchSDA_spatial(660848, db = 'STATSGO', method = "bbox", - add.fields = c("mapunit.muname","legend.areaname")) + add.fields = c("mapunit.muname", "legend.areaname")) expect_equal(nrow(statsgo.bbox), 5) + + # test STATSGO gsmmupolygon (1 envelope around 5 delineations) + statsgo.bbox <- fetchSDA_spatial(660848, db = 'STATSGO', method = "envelope", + add.fields = c("mapunit.muname", "legend.areaname")) + expect_equal(nrow(statsgo.bbox), 1) + # skip_if_not_installed('sf') # suppressWarnings(requireNamespace("sf"))