From dea00329677e0049acb464191d260b3453415e31 Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Fri, 11 Jun 2021 09:50:43 -0700 Subject: [PATCH] Updates to `get_SDA_*` / SOD-like methods (#185) * get_SDA_property: Vectorizing `property` argument, add argument to get query string * get_SDA_interpretation: Vectorizing `rulename` * get_SDA*: Standardize lower case MUKEY, COKEY, CHKEY, etc. * get_SDA_interpretations: consistent column names for all methods w/ .cleanRuleColumnName() * Demo of bottom_depth logic for weighted average, and comparing column names in results * removing conversion of NULL to 0, why was this in there * get_SDA_interpretation: add not_rated_value argument, default NA_real_ * Update docs Co-authored-by: Dylan Beaudette --- R/SDA_hydric.R | 24 +- R/SDA_interpretations.R | 276 +++++++------ R/SDA_properties.R | 377 +++++++++--------- man/get_SDA_interpretation.Rd | 25 +- man/get_SDA_property.Rd | 27 +- misc/getSDA_vectorize.R | 26 ++ .../soil-data-aggregation/get_SDA_SOD-tests.R | 40 ++ tests/testthat/test-SDA_interpretations.R | 28 +- tests/testthat/test-SDA_properties.R | 24 +- 9 files changed, 511 insertions(+), 336 deletions(-) create mode 100644 misc/getSDA_vectorize.R create mode 100644 misc/soil-data-aggregation/get_SDA_SOD-tests.R diff --git a/R/SDA_hydric.R b/R/SDA_hydric.R index b373748c..3ee4af87 100644 --- a/R/SDA_hydric.R +++ b/R/SDA_hydric.R @@ -24,28 +24,28 @@ where_clause <- switch(as.character(is.null(areasymbols)), "TRUE" = sprintf("mu.mukey IN %s", mukeys), "FALSE" = sprintf("l.areasymbol IN %s", areasymbols)) -q <- sprintf("SELECT AREASYMBOL, - MUSYM, - MUNAME, - mu.mukey/1 AS MUKEY, +q <- sprintf("SELECT areasymbol, + musym, + muname, + mu.mukey/1 AS mukey, (SELECT TOP 1 COUNT_BIG(*) FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey AND mapunit.mukey = mu.mukey) AS comp_count, + INNER JOIN component ON component.mukey = mapunit.mukey AND mapunit.mukey = mu.mukey) AS comp_count, (SELECT TOP 1 COUNT_BIG(*) FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey AND mapunit.mukey = mu.mukey + INNER JOIN component ON component.mukey = mapunit.mukey AND mapunit.mukey = mu.mukey AND majcompflag = 'Yes') AS count_maj_comp, (SELECT TOP 1 COUNT_BIG(*) FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey AND mapunit.mukey = mu.mukey + INNER JOIN component ON component.mukey = mapunit.mukey AND mapunit.mukey = mu.mukey AND hydricrating = 'Yes' ) AS all_hydric, (SELECT TOP 1 COUNT_BIG(*) FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey AND mapunit.mukey = mu.mukey + INNER JOIN component ON component.mukey = mapunit.mukey AND mapunit.mukey = mu.mukey AND majcompflag = 'Yes' AND hydricrating = 'Yes') AS maj_hydric, (SELECT TOP 1 COUNT_BIG(*) FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey AND mapunit.mukey = mu.mukey + INNER JOIN component ON component.mukey = mapunit.mukey AND mapunit.mukey = mu.mukey AND majcompflag = 'Yes' AND hydricrating != 'Yes') AS maj_not_hydric, (SELECT TOP 1 COUNT_BIG(*) FROM mapunit @@ -53,18 +53,18 @@ q <- sprintf("SELECT AREASYMBOL, AND majcompflag != 'Yes' AND hydricrating = 'Yes' ) AS hydric_inclusions, (SELECT TOP 1 COUNT_BIG(*) FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey AND mapunit.mukey = mu.mukey + INNER JOIN component ON component.mukey = mapunit.mukey AND mapunit.mukey = mu.mukey AND hydricrating != 'Yes') AS all_not_hydric, (SELECT TOP 1 COUNT_BIG(*) FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey AND mapunit.mukey = mu.mukey + INNER JOIN component ON component.mukey = mapunit.mukey AND mapunit.mukey = mu.mukey AND hydricrating IS NULL ) AS hydric_null INTO #main_query FROM legend AS l INNER JOIN mapunit AS mu ON mu.lkey = l.lkey AND %s - SELECT AREASYMBOL, MUKEY, MUSYM, MUNAME, + SELECT areasymbol, mukey, musym, muname, CASE WHEN comp_count = all_not_hydric + hydric_null THEN 'Nonhydric' WHEN comp_count = all_hydric THEN 'Hydric' WHEN comp_count != all_hydric AND count_maj_comp = maj_hydric THEN 'Predominantly Hydric' diff --git a/R/SDA_interpretations.R b/R/SDA_interpretations.R index 7c38d7af..aede83d9 100644 --- a/R/SDA_interpretations.R +++ b/R/SDA_interpretations.R @@ -1,17 +1,34 @@ # Based on ssurgoOnDemand by chad ferguson and jason nemecek # SDA_interpretations.R: translation of SDA_Interps.py into soilDB-style R function by andrew brown -# last update: 2021/04/03 +# created: 2021/04/03 +# last update: 2021/05/30 #' Get map unit interpretations from Soil Data Access by rule name #' -#' @param rulename rule name of interpretation (matching a `mrulename` in `cointerp` table) +#' @param rulename character vector of interpretation rule names (matching `mrulename` in `cointerp` table) #' @param method aggregation method. One of: "Dominant Component", "Dominant Condition", "Weighted Average", "None". If "None" is selected one row will be returned per component, otherwise one row will be returned per map unit. #' @param areasymbols vector of soil survey area symbols #' @param mukeys vector of map unit keys -#' @details -#' +#' @param query_string Default: `FALSE`; if `TRUE` return a character string containing query that would be sent to SDA via `SDA_query` +#' @param not_rated_value used where rating class is "Not Rated". Default: `NA_real` +#' @examples +#' \donttest{ +#' if(requireNamespace("curl") & +#' curl::has_internet()) { +#' +#' # get two forestry interpretations for CA630 +#' get_SDA_interpretation(c("FOR - Potential Seedling Mortality", +#' "FOR - Road Suitability (Natural Surface)"), +#' method = "Dominant Condition", +#' areasymbols = "CA630") +#' } +#' } +#' +#' +#' @details +#' #' ## Rule Names in `cointerp` table -#' +#' #' - AGR-Agronomic Concerns (ND) #' - AGR-Available Water Capacity (ND) #' - AGR-Natural Fertility (ND) @@ -639,18 +656,21 @@ #' - WMS - Surface Drains (TX) #' - WMS - Surface Irrigation Intake Family (TX) #' - WMS - Surface Water Management, System -#' +#' #' @author Jason Nemecek, Chad Ferguson, Andrew Brown #' @return a data.frame #' @export #' @importFrom soilDB format_SQL_in_statement SDA_query -#' -get_SDA_interpretation <- function(rulename, - method = c("Dominant Component", - "Dominant Condition", - "Weighted Average", +#' +get_SDA_interpretation <- function(rulename, + method = c("Dominant Component", + "Dominant Condition", + "Weighted Average", "None"), - areasymbols = NULL, mukeys = NULL) { + areasymbols = NULL, + mukeys = NULL, + query_string = FALSE, + not_rated_value = NA_real_) { q <- .constructInterpQuery( method = method, interp = rulename, @@ -658,15 +678,28 @@ get_SDA_interpretation <- function(rulename, mukeys = mukeys ) + if (query_string) return(q) + # execute query - res <- soilDB::SDA_query(q) + res <- suppressMessages(soilDB::SDA_query(q)) # stop if bad if (inherits(res, 'try-error')) { warnings() stop(attr(res, 'condition')) } - + + # check rating column values + ratingcols <- colnames(res)[grep("^rating_", colnames(res))] + res[] <- lapply(colnames(res), function(x) { + y <- res[[x]] + if(x %in% ratingcols) { + # SQL will set 99 rating value for class == "Not rated" + y[is.na(y) | y == 99] <- not_rated_value + return(y) + } + y + }) return(res) } @@ -1037,106 +1070,56 @@ get_SDA_interpretation <- function(rulename, agg_method <- .interpretationAggMethod(method) areasymbols <- soilDB::format_SQL_in_statement(areasymbols) switch(agg_method$method, - "DOMINANT COMPONENT" = sprintf("SELECT areasymbol, musym, muname, mu.mukey AS MUKEY, - (SELECT interphr FROM component INNER JOIN cointerp ON component.cokey = cointerp.cokey AND component.cokey = c.cokey AND ruledepth = 0 AND mrulename LIKE '%s') as rating, - (SELECT interphrc FROM component INNER JOIN cointerp ON component.cokey = cointerp.cokey AND component.cokey = c.cokey AND ruledepth = 0 AND mrulename LIKE '%s') as class, - (SELECT DISTINCT SUBSTRING( ( SELECT ( '; ' + interphrc) - FROM mapunit - INNER JOIN component ON component.mukey=mu.mukey AND compkind != 'miscellaneous area' AND component.cokey=c.cokey - INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey - AND ruledepth != 0 AND interphrc NOT LIKE 'Not%%' AND mrulename LIKE '%s' GROUP BY interphrc, interphr - ORDER BY interphr DESC, interphrc - FOR XML PATH('') ), 3, 1000) )as reason - FROM legend AS l - INNER JOIN mapunit AS mu ON mu.lkey = l.lkey AND %s - INNER JOIN component AS c ON c.mukey = mu.mukey AND c.cokey = (SELECT TOP 1 c1.cokey FROM component AS c1 - INNER JOIN mapunit ON c.mukey=mapunit.mukey AND c1.mukey=mu.mukey ORDER BY c1.comppct_r DESC, c1.cokey)", - interp, interp, interp, where_clause), - - "DOMINANT CONDITION" = sprintf("SELECT areasymbol, musym, muname, mu.mukey/1 AS MUKEY, - (SELECT TOP 1 ROUND (AVG(interphr) over(partition by interphrc),2) - FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey - INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey AND ruledepth = 0 AND mrulename LIKE '%s' GROUP BY interphrc, interphr - ORDER BY SUM (comppct_r) DESC)as rating, - (SELECT TOP 1 interphrc - FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey - INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey AND ruledepth = 0 AND mrulename LIKE '%s' - GROUP BY interphrc, comppct_r ORDER BY SUM(comppct_r) over(partition by interphrc) DESC) as class, - - (SELECT DISTINCT SUBSTRING( ( SELECT ( '; ' + interphrc) - FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey AND compkind != 'miscellaneous area' AND component.cokey=c.cokey - INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey - - AND ruledepth != 0 AND interphrc NOT LIKE 'Not%%' AND mrulename LIKE '%s' GROUP BY interphrc, interphr - ORDER BY interphr DESC, interphrc - FOR XML PATH('') ), 3, 1000) )as reason - - - FROM legend AS l - INNER JOIN mapunit AS mu ON mu.lkey = l.lkey AND %s - INNER JOIN component AS c ON c.mukey = mu.mukey AND c.cokey = - (SELECT TOP 1 c1.cokey FROM component AS c1 - INNER JOIN mapunit ON c.mukey=mapunit.mukey AND c1.mukey=mu.mukey ORDER BY c1.comppct_r DESC, c1.cokey) - ORDER BY areasymbol, musym, muname, mu.mukey", - interp, interp, interp, where_clause), - - "WEIGHTED AVERAGE" = sprintf("SELECT areasymbol, musym, muname, mu.mukey/1 AS MUKEY, - (SELECT TOP 1 CASE WHEN ruledesign = 1 THEN 'limitation' - WHEN ruledesign = 2 THEN 'suitability' END - FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey - INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey AND ruledepth = 0 AND mrulename LIKE '%s' - GROUP BY mapunit.mukey, ruledesign) as design, - ROUND ((SELECT SUM (interphr * comppct_r) - FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey - INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey AND ruledepth = 0 AND mrulename LIKE '%s' - GROUP BY mapunit.mukey),2) as rating, - ROUND ((SELECT SUM (comppct_r) - FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey - INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey AND ruledepth = 0 AND mrulename LIKE '%s' - AND (interphr) IS NOT NULL GROUP BY mapunit.mukey),2) as sum_com, - (SELECT DISTINCT SUBSTRING( ( SELECT ( '; ' + interphrc) - FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey AND compkind != 'miscellaneous area' - INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey + "DOMINANT COMPONENT" = .interpretation_aggregation(interp, where_clause, dominant = TRUE), + "DOMINANT CONDITION" = .interpretation_by_condition(interp, where_clause, dominant = TRUE), + "WEIGHTED AVERAGE" = .interpretation_weighted_average(interp, where_clause), + "NONE" = .interpretation_aggregation(interp, where_clause) + ) +} - AND ruledepth != 0 AND interphrc NOT LIKE 'Not%%' AND mrulename LIKE '%s' GROUP BY interphrc - ORDER BY interphrc - FOR XML PATH('') ), 3, 1000) )as reason +.cleanRuleColumnName <- function(x) gsub("[^A-Za-z0-9]", "", x) - INTO #main - FROM legend AS l - INNER JOIN mapunit AS mu ON mu.lkey = l.lkey AND %s - INNER JOIN component AS c ON c.mukey = mu.mukey - GROUP BY areasymbol, musym, muname, mu.mukey +.interpretation_by_condition <- function(interp, where_clause, dominant = TRUE) { + sprintf("SELECT areasymbol, musym, muname, mu.mukey/1 AS mukey, + %s + FROM legend AS l + INNER JOIN mapunit AS mu ON mu.lkey = l.lkey AND %s + INNER JOIN component AS c ON c.mukey = mu.mukey %s + ORDER BY areasymbol, musym, muname, mu.mukey", + paste0(sapply(interp, function(x) sprintf(" (SELECT TOP 1 ROUND (AVG(interphr) OVER (PARTITION BY interphrc), 2) + FROM mapunit + INNER JOIN component ON component.mukey = mapunit.mukey + INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey AND ruledepth = 0 AND mrulename LIKE '%s' GROUP BY interphrc, interphr + ORDER BY SUM (comppct_r) DESC) AS [rating_%s], + (SELECT TOP 1 interphrc + FROM mapunit + INNER JOIN component ON component.mukey = mapunit.mukey + INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey AND ruledepth = 0 AND mrulename LIKE '%s' + GROUP BY interphrc, comppct_r ORDER BY SUM(comppct_r) OVER (PARTITION BY interphrc) DESC) AS [class_%s], - SELECT areasymbol, musym, muname, MUKEY, ISNULL (ROUND ((rating/sum_com),2), 99) AS rating, - CASE WHEN rating IS NULL THEN 'Not Rated' - WHEN design = 'suitability' AND ROUND ((rating/sum_com),2) < = 0 THEN 'Not suited' - WHEN design = 'suitability' AND ROUND ((rating/sum_com),2) > 0.001 and ROUND ((rating/sum_com),2) <=0.333 THEN 'Poorly suited' - WHEN design = 'suitability' AND ROUND ((rating/sum_com),2) > 0.334 and ROUND ((rating/sum_com),2) <=0.666 THEN 'Moderately suited' - WHEN design = 'suitability' AND ROUND ((rating/sum_com),2) > 0.667 and ROUND ((rating/sum_com),2) <=0.999 THEN 'Moderately well suited' - WHEN design = 'suitability' AND ROUND ((rating/sum_com),2) = 1 THEN 'Well suited' + (SELECT DISTINCT SUBSTRING((SELECT('; ' + interphrc) + FROM mapunit + INNER JOIN component ON component.mukey = mapunit.mukey AND compkind != 'miscellaneous area' AND component.cokey = c.cokey + INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey + AND ruledepth != 0 AND interphrc NOT LIKE 'Not%%' AND mrulename LIKE '%s' GROUP BY interphrc, interphr + ORDER BY interphr DESC, interphrc + FOR XML PATH('') ), 3, 1000)) AS [reason_%s]", + x, .cleanRuleColumnName(x), + x, .cleanRuleColumnName(x), + x, .cleanRuleColumnName(x))), + collapse = ", "), where_clause, + ifelse(dominant, "AND c.cokey = + (SELECT TOP 1 c1.cokey FROM component AS c1 + INNER JOIN mapunit ON c.mukey = mapunit.mukey AND c1.mukey = mu.mukey ORDER BY c1.comppct_r DESC, c1.cokey)", "")) +} - WHEN design = 'limitation' AND ROUND ((rating/sum_com),2) < = 0 THEN 'Not limited ' - WHEN design = 'limitation' AND ROUND ((rating/sum_com),2) > 0.001 and ROUND ((rating/sum_com),2) <=0.333 THEN 'Slightly limited ' - WHEN design = 'limitation' AND ROUND ((rating/sum_com),2) > 0.334 and ROUND ((rating/sum_com),2) <=0.666 THEN 'Somewhat limited ' - WHEN design = 'limitation' AND ROUND ((rating/sum_com),2) > 0.667 and ROUND ((rating/sum_com),2) <=0.999 THEN 'Moderately limited ' - WHEN design = 'limitation' AND ROUND ((rating/sum_com),2) = 1 THEN 'Very limited' END AS class, reason - FROM #main - DROP TABLE #main", interp, interp, interp, interp, where_clause), - - "NONE" = sprintf("SELECT areasymbol, musym, muname, mu.mukey AS MUKEY, c.cokey AS cokey, c.compname AS compname, c.comppct_r AS comppct_r, +.interpretation_aggregation <- function(interp, where_clause, dominant = FALSE) { + sprintf("SELECT areasymbol, musym, muname, mu.mukey/1 AS mukey, c.cokey AS cokey, c.compname AS compname, c.comppct_r AS comppct_r, %s FROM legend AS l INNER JOIN mapunit AS mu ON mu.lkey = l.lkey AND %s - INNER JOIN component AS c ON c.mukey = mu.mukey", - paste0(sapply(interp, function(x) sprintf("(SELECT interphr FROM component INNER JOIN cointerp ON component.cokey = cointerp.cokey AND component.cokey = c.cokey AND ruledepth = 0 AND mrulename LIKE '%s') as [rating_%s], + INNER JOIN component AS c ON c.mukey = mu.mukey %s", + paste0(sapply(interp, function(x) sprintf("(SELECT interphr FROM component INNER JOIN cointerp ON component.cokey = cointerp.cokey AND component.cokey = c.cokey AND ruledepth = 0 AND mrulename LIKE '%s') as [rating_%s], (SELECT interphrc FROM component INNER JOIN cointerp ON component.cokey = cointerp.cokey AND component.cokey = c.cokey AND ruledepth = 0 AND mrulename LIKE '%s') as [class_%s], (SELECT DISTINCT SUBSTRING( ( SELECT ( '; ' + interphrc) FROM mapunit @@ -1144,12 +1127,77 @@ get_SDA_interpretation <- function(rulename, INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey AND ruledepth != 0 AND interphrc NOT LIKE 'Not%%' AND mrulename LIKE '%s' GROUP BY interphrc, interphr ORDER BY interphr DESC, interphrc - FOR XML PATH('') ), 3, 1000)) as [reason_%s]", - x, .cleanRuleColumnName(x), - x, .cleanRuleColumnName(x), - x, .cleanRuleColumnName(x))), - collapse = ", "), where_clause) - ) + FOR XML PATH('') ), 3, 1000)) as [reason_%s]", + x, .cleanRuleColumnName(x), + x, .cleanRuleColumnName(x), + x, .cleanRuleColumnName(x))), + collapse = ", "), where_clause, + ifelse(dominant, "AND c.cokey = (SELECT TOP 1 c1.cokey FROM component AS c1 + INNER JOIN mapunit ON c.mukey = mapunit.mukey AND c1.mukey = mu.mukey + ORDER BY c1.comppct_r DESC, c1.cokey)", "")) } -.cleanRuleColumnName <- function(x) gsub("[^A-Za-z0-9]", "", x) +.interpretation_weighted_average <- function(interp, where_clause) { + sprintf("SELECT areasymbol, musym, muname, mu.mukey/1 AS mukey, + %s + INTO #main + FROM legend AS l + INNER JOIN mapunit AS mu ON mu.lkey = l.lkey AND %s + INNER JOIN component AS c ON c.mukey = mu.mukey + GROUP BY areasymbol, musym, muname, mu.mukey + SELECT areasymbol, musym, muname, mukey, + %s, + %s, + %s + FROM #main + DROP TABLE #main", + paste0(sapply(interp, function(x) sprintf("(SELECT TOP 1 CASE WHEN ruledesign = 1 THEN 'limitation' + WHEN ruledesign = 2 THEN 'suitability' END + FROM mapunit + INNER JOIN component ON component.mukey = mapunit.mukey + INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey AND ruledepth = 0 AND mrulename LIKE '%s' + GROUP BY mapunit.mukey, ruledesign) AS [design_%s], + ROUND ((SELECT SUM (interphr * comppct_r) + FROM mapunit + INNER JOIN component ON component.mukey = mapunit.mukey + INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey AND ruledepth = 0 AND mrulename LIKE '%s' + GROUP BY mapunit.mukey),2) AS [rating_%s], + ROUND ((SELECT SUM (comppct_r) + FROM mapunit + INNER JOIN component ON component.mukey = mapunit.mukey + INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey AND ruledepth = 0 AND mrulename LIKE '%s' + AND (interphr) IS NOT NULL GROUP BY mapunit.mukey),2) AS [sum_com_%s], + (SELECT DISTINCT SUBSTRING((SELECT ( '; ' + interphrc) + FROM mapunit + INNER JOIN component ON component.mukey = mapunit.mukey AND compkind != 'miscellaneous area' + INNER JOIN cointerp ON component.cokey = cointerp.cokey AND mapunit.mukey = mu.mukey + AND ruledepth != 0 AND interphrc NOT LIKE 'Not%%' AND mrulename LIKE '%s' GROUP BY interphrc + ORDER BY interphrc + FOR XML PATH('') ), 3, 1000)) AS [reason_%s]", + x, .cleanRuleColumnName(x), + x, .cleanRuleColumnName(x), + x, .cleanRuleColumnName(x), + x, .cleanRuleColumnName(x))), collapse=", "), + where_clause, + paste0(sapply(interp, + function(x) sprintf("ISNULL(ROUND(([rating_%s] / [sum_com_%s]),2), 99) AS [rating_%s]", + .cleanRuleColumnName(x), .cleanRuleColumnName(x), .cleanRuleColumnName(x))), + collapse = ", "), + paste0(sapply(interp, + function(x) sprintf(gsub("design", paste0("[design_", .cleanRuleColumnName(x),"]"), + gsub("sum_com", paste0("[sum_com_", .cleanRuleColumnName(x), "]"), + gsub("rating", paste0("[rating_", .cleanRuleColumnName(x), "]"), + "CASE WHEN rating IS NULL THEN 'Not Rated' + WHEN design = 'suitability' AND ROUND((rating/sum_com),2) <= 0 THEN 'Not suited' + WHEN design = 'suitability' AND ROUND((rating/sum_com),2) > 0.001 and ROUND((rating/sum_com),2) <=0.333 THEN 'Poorly suited' + WHEN design = 'suitability' AND ROUND((rating/sum_com),2) > 0.334 and ROUND((rating/sum_com),2) <=0.666 THEN 'Moderately suited' + WHEN design = 'suitability' AND ROUND((rating/sum_com),2) > 0.667 and ROUND((rating/sum_com),2) <=0.999 THEN 'Moderately well suited' + WHEN design = 'suitability' AND ROUND((rating/sum_com),2) = 1 THEN 'Well suited' + WHEN design = 'limitation' AND ROUND((rating/sum_com),2) <= 0 THEN 'Not limited' + WHEN design = 'limitation' AND ROUND((rating/sum_com),2) > 0.001 and ROUND((rating/sum_com),2) <=0.333 THEN 'Slightly limited' + WHEN design = 'limitation' AND ROUND((rating/sum_com),2) > 0.334 and ROUND((rating/sum_com),2) <=0.666 THEN 'Somewhat limited' + WHEN design = 'limitation' AND ROUND((rating/sum_com),2) > 0.667 and ROUND((rating/sum_com),2) <=0.999 THEN 'Moderately limited' + WHEN design = 'limitation' AND ROUND((rating/sum_com),2) = 1 THEN 'Very limited' END AS [class_%s]"))), + .cleanRuleColumnName(x))), + collapse = ", "), paste0(sapply(interp, function(x) sprintf("[reason_%s]", .cleanRuleColumnName(x))), collapse = ", ")) +} diff --git a/R/SDA_properties.R b/R/SDA_properties.R index aee8e56a..90a20c87 100644 --- a/R/SDA_properties.R +++ b/R/SDA_properties.R @@ -1,22 +1,40 @@ # Based on ssurgoOnDemand by chad ferguson and jason nemecek # SDA_properties.R: translation of SDA_Properties.py into soilDB-style R function by andrew brown -# last update: 2021/04/03 +# created: 2021/04/03 +# last update: 2021/05/30 #' Get map unit properties from Soil Data Access #' -#' @param property a label or column name from property dictionary +#' @param property character vector of labels from property dictionary tables (see details) OR physical column names from `component` or `chorizon` table. #' @param method one of: "Dominant Component (Category)", "Weighted Average", "Min/Max", "Dominant Component (Numeric)", "Dominant Condition", or "None". If "None" is selected, the number of rows returned will depend on whether a component or horizon level property was selected, otherwise the result will be 1:1 with the number of map units. #' @param areasymbols vector of soil survey area symbols #' @param mukeys vector of map unit keys #' @param top_depth Default: `0` (centimeters); a numeric value for upper boundary (top depth) used only for method="weighted average" and "dominant component (numeric)" #' @param bottom_depth Default: `200` (centimeters); a numeric value for lower boundary (bottom depth) used only for method="weighted average" and "dominant component (numeric)" -#' @param FUN Optional: character representing SQL aggregation function either "MIN" or "MAX" for method="min/max" -#' @details -#' -#' The `property` argument refers to one of the property names or columns specified in the tables below. -#' +#' @param FUN Optional: character representing SQL aggregation function either "MIN" or "MAX" used only for method="min/max" +#' @param query_string Default: `FALSE`; if `TRUE` return a character string containing query that would be sent to SDA via `SDA_query` +#' @examples +#' +#' \donttest{ +#' if(requireNamespace("curl") & +#' curl::has_internet()) { +#' +#' # get 1/3 bar bulk density [0,25] centimeter depth weighted average from dominant component +#' get_SDA_property(property = c("dbthirdbar_l","dbthirdbar_r","dbthirdbar_h"), +#' method = "Dominant Component (Numeric)", +#' areasymbols = "CA630", +#' top_depth = 0, +#' bottom_depth = 25) +#' } +#' } +#' +#' @details +#' +#' The `property` argument refers to one of the property names or columns specified in the tables below. Note that `property` can be specified as either a character vector of labeled properties, such as `"Bulk Density 0.33 bar H2O - Rep Value"`, OR physical column names such as `"dbthirdbar_r"`. To get "low" and "high" values for a particular property, replace the `_r` with `_l` or `_h` in the physical column name; for example `property = c("dbthirdbar_l","dbthirdbar_r","dbthirdbar_h")`. You can view exhaustive lists of component and component horizon level properties in the Soil Data Access ["Tables and Columns Report"](https://sdmdataaccess.sc.egov.usda.gov/documents/TablesAndColumnsReport.pdf). +#' +#' #' ## Selected Component-level Properties -#' +#' #' |**Property (Component)** |**Column** | #' |:------------------------------------------------|:------------------| #' |Range Production - Favorable Year |rsprod_h | @@ -33,9 +51,9 @@ #' |Wind Erodibility Group |weg | #' |Wind Erodibility Index |wei | #' |t Factor |tfact | -#' +#' #' ## Selected Horizon-level Properties -#' +#' #' |**Property (Horizon)** |**Column** | #' |:------------------------------------------------|:------------------| #' |0.1 bar H2O - Rep Value |wtenthbar_r | @@ -104,13 +122,14 @@ get_SDA_property <- function(property, # property -- a label or column name from property dictionary method = c("Dominant Component (Category)", "Weighted Average", - "Min/Max", "Dominant Component (Numeric)", "Dominant Condition", + "Min/Max", "Dominant Component (Numeric)", "Dominant Condition", "None"), areasymbols = NULL, # vector of areasymbols mukeys = NULL, # vector of mukeys top_depth = 0, # used for method="weighted average" and "dominant component (numeric)" bottom_depth = 200, # used for method="weighted average" and "dominant component (numeric)" - FUN = NULL) # used for method="min/max" + FUN = NULL, + query_string = FALSE) # used for method="min/max" { @@ -118,12 +137,14 @@ get_SDA_property <- property = property, areasymbols = areasymbols, mukeys = mukeys, - tDep = top_depth, - bDep = bottom_depth, - mmC = FUN) + top_depth = top_depth, + bottom_depth = bottom_depth, + FUN = FUN) + + if (query_string) return(q) # execute query - res <- soilDB::SDA_query(q) + res <- suppressMessages(soilDB::SDA_query(q)) # stop if bad if (inherits(res, 'try-error')) { @@ -131,8 +152,6 @@ get_SDA_property <- stop(attr(res, 'condition')) } - # TODO: use #aMethod$modifier on res? - return(res) } @@ -183,7 +202,7 @@ get_SDA_property <- 'Corrosion of Concrete' = 'corcon', 'Drainage Class' = 'drainagecl', 'Effective Cation Exchange Capcity - Rep Value' = 'ecec_r', - 'Electrial Conductivity 1:5 by volume - Rep Value' = 'ec15_r', + 'Electrical Conductivity 1:5 by volume - Rep Value' = 'ec15_r', 'Electrical Conductivity - Rep Value' = 'ec_r', 'Exchangeable Sodium Percentage - Rep Value' = 'esp_r', 'Extract Aluminum - Rep Value' = 'extral_r', @@ -240,155 +259,217 @@ get_SDA_property <- .constructPropQuery <- function(method, property, areasymbols = NULL, mukeys = NULL, - tDep = 0, bDep = 200, mmC = NULL) { + top_depth = 0, bottom_depth = 200, FUN = NULL) { # SQL by Jason Nemecek - - stopifnot(!is.null(areasymbols) | !is.null(mukeys)) + # must specify either mukeys or areasymbols + stopifnot(!is.null(areasymbols) || !is.null(mukeys)) + + # convert target mukey/areasymbol to SQL IN statement ('a','b','c') if (!is.null(areasymbols)) areasymbols <- soilDB::format_SQL_in_statement(areasymbols) if (!is.null(mukeys)) mukeys <- soilDB::format_SQL_in_statement(mukeys) + # if areasymbols is specified, it is used preferentially, otherwise mukeys where_clause <- switch(as.character(is.null(areasymbols)), "TRUE" = sprintf("mu.mukey IN %s", mukeys), "FALSE" = sprintf("l.areasymbol IN %s", areasymbols)) - + # check property, case insensitive, against dictionary + # user can also specify columns that aren't in the dictionary using physical column name property_up <- toupper(property) lut <- .propertyDictionary() names(lut) <- toupper(names(lut)) agg_property <- lut[property_up] - - not_in_lut <- sapply(agg_property, is.null) - - # if they are all not in lookup table, assume user knows what they are doing + + # check whether properties are in dictionary + not_in_lut <- sapply(agg_property, function(x) is.null(x) || is.na(x)) + + # if they are all not in dictionary, assume user knows what they are doing # this means you can't mix column name input and readable label input in same call if (all(not_in_lut)) { - + ## strict: only allow properties from the lookup table # names(lut) <- toupper(.propertyDictionary()) # agg_property <- lut[property] # if(any(is.null(agg_property))) stop("property must be a label or column name from SDA property dictionary (.propertyDictonary())", call. = FALSE) - - ## alternate: just assume they are either all component or all horizon column names + + ## alternate: just assume they are either all component or all horizon column names # message('assuming `property` is a vector of component OR horizon-level column names') agg_property <- property - + } else { - + # remove non-matching if using lookup table labels agg_property <- agg_property[!not_in_lut] } - + + if (!is.character(method)) + stop('argument `method` should be character string containing aggregation method, or `"NONE"` for no aggregation', call. = FALSE) + method <- toupper(method) - + if (method == "NONE") if (all(agg_property %in% colnames(suppressMessages(SDA_query("SELECT TOP 1 * FROM chorizon"))))) method <- "NONE_HORIZON" - - mmC <- toupper(mmC) - # check mmC arg for min max method + FUN <- toupper(FUN) + + # check FUN arg for min max method if (method == "MIN/MAX") { - mmC <- match.arg(mmC, c("MIN","MAX")) - # handle shorthand min/max for mmC passed as method + FUN <- match.arg(FUN, c("MIN", "MAX")) + # handle shorthand min/max for FUN passed as method } else if (method == "MAX") { method <- "MIN/MAX" - mmC <- "MAX" + FUN <- "MAX" } else if (method == "MIN") { method <- "MIN/MAX" - mmC <- "MIN" + FUN <- "MIN" } + # determine label and column prefix/suffix for selected method agg_method <- .propertyAggMethod(method) - switch(toupper(agg_method$method), - # dominant component (category) - "DOMINANT COMPONENT (CATEGORY)" = - sprintf("SELECT areasymbol, musym, muname, mu.mukey AS mukey, %s AS %s - FROM legend AS l - INNER JOIN mapunit AS mu ON mu.lkey = l.lkey AND %s - INNER JOIN component AS c ON c.mukey = mu.mukey AND - c.cokey = (SELECT TOP 1 c1.cokey FROM component AS c1 - INNER JOIN mapunit ON c.mukey=mapunit.mukey AND c1.mukey=mu.mukey ORDER BY c1.comppct_r DESC, c1.cokey)", + # handle top_depth / bottom_depth mis-specified + if (agg_method$method %in% c("WEIGHTED AVERAGE","DOMINANT COMPONENT (NUMERIC)")) { + if (!all(c(is.numeric(top_depth), is.numeric(bottom_depth))) || + any(c(is.na(top_depth), is.na(bottom_depth)))) { + stop("`top_depth` and `bottom_depth` must be numeric, non-NA depths in centimeters for method='weighted average' or 'dominant component (numeric)'", + call. = FALSE) + } + } + + # define several helper methods + .property_dominant_condition_category <- function(property) { + sprintf("(SELECT TOP 1 %s FROM mapunit + INNER JOIN component ON component.mukey=mapunit.mukey AND mapunit.mukey = mu.mukey + GROUP BY %s, comppct_r ORDER BY SUM(comppct_r) over(partition by %s) DESC) AS %s", + property, property, property, property) + } + + .property_min_max <- function(property, FUN) { + sprintf("(SELECT TOP 1 %s (chm1.%s) FROM component AS cm1 + INNER JOIN chorizon AS chm1 ON cm1.cokey = chm1.cokey AND + cm1.cokey = c.cokey + AND CASE + WHEN chm1.hzname LIKE '%%O%%' AND hzdept_r <10 THEN 2 + WHEN chm1.hzname LIKE '%%r%%' THEN 2 + WHEN chm1.hzname LIKE '%%' THEN 1 ELSE 1 END = 1) AS %s", + FUN, property, property) + } + + .property_dominant_component_numeric <- function(property, top_depth, bottom_depth, where_clause) { + # dominant component numeric is a more specific case of weighted average + .property_weighted_average(property, top_depth, bottom_depth, where_clause, dominant = TRUE) + } + + .property_weighted_average <- function(property, top_depth, bottom_depth, where_clause, dominant = FALSE) { - agg_property, agg_property, - where_clause), - # weighted average - "WEIGHTED AVERAGE" = sprintf("SELECT areasymbol, musym, muname, mukey + n <- 1:length(property) + stopifnot(n > 0) + sprintf("SELECT areasymbol, musym, muname, mukey INTO #kitchensink - FROM legend AS lks - INNER JOIN mapunit AS muks ON muks.lkey = lks.lkey AND %s + FROM legend AS lks + INNER JOIN mapunit AS muks ON muks.lkey = lks.lkey AND %s SELECT mu1.mukey, cokey, comppct_r, SUM (comppct_r) over(partition by mu1.mukey ) AS SUM_COMP_PCT INTO #comp_temp FROM legend AS l1 INNER JOIN mapunit AS mu1 ON mu1.lkey = l1.lkey AND %s INNER JOIN component AS c1 ON c1.mukey = mu1.mukey AND majcompflag = 'Yes' + %s SELECT cokey, SUM_COMP_PCT, CASE WHEN comppct_r = SUM_COMP_PCT THEN 1 ELSE CAST (CAST (comppct_r AS decimal (5,2)) / CAST (SUM_COMP_PCT AS decimal (5,2)) AS decimal (5,2)) END AS WEIGHTED_COMP_PCT INTO #comp_temp3 FROM #comp_temp - SELECT - areasymbol, musym, muname, mu.mukey/1 AS MUKEY, c.cokey AS COKEY, ch.chkey/1 AS CHKEY, compname, hzname, hzdept_r, hzdepb_r, CASE WHEN hzdept_r < %s THEN %s ELSE hzdept_r END AS hzdept_r_ADJ, + SELECT areasymbol, musym, muname, mu.mukey/1 AS mukey, c.cokey AS cokey, ch.chkey/1 AS chkey, compname, hzname, hzdept_r, hzdepb_r, CASE WHEN hzdept_r < %s THEN %s ELSE hzdept_r END AS hzdept_r_ADJ, CASE WHEN hzdepb_r > %s THEN %s ELSE hzdepb_r END AS hzdepb_r_ADJ, - CAST (CASE WHEN hzdepb_r > %s THEN %s ELSE hzdepb_r END - CASE WHEN hzdept_r <%s THEN %s ELSE hzdept_r END AS decimal (5,2)) AS thickness, + CAST (CASE WHEN hzdepb_r > %s THEN %s ELSE hzdepb_r END - CASE WHEN hzdept_r < %s THEN %s ELSE hzdept_r END AS decimal (5,2)) AS thickness, comppct_r, CAST (SUM (CASE WHEN hzdepb_r > %s THEN %s ELSE hzdepb_r END - CASE WHEN hzdept_r < %s THEN %s ELSE hzdept_r END) over(partition by c.cokey) AS decimal (5,2)) AS sum_thickness, - CAST (ISNULL (%s, 0) AS decimal (5,2)) AS %s + %s INTO #main FROM legend AS l INNER JOIN mapunit AS mu ON mu.lkey = l.lkey AND %s INNER JOIN component AS c ON c.mukey = mu.mukey INNER JOIN chorizon AS ch ON ch.cokey=c.cokey AND hzname NOT LIKE '%%O%%' AND hzname NOT LIKE '%%r%%' - AND hzdepb_r > %s AND hzdept_r <%s - INNER JOIN chtexturegrp AS cht ON ch.chkey=cht.chkey WHERE - cht.rvindicator = 'yes' AND ch.hzdept_r IS NOT NULL - AND texture NOT LIKE '%%PM%%' and texture NOT LIKE '%%DOM%%' and texture NOT LIKE '%%MPT%%' and texture NOT LIKE '%%MUCK%%' and texture NOT LIKE '%%PEAT%%' and texture NOT LIKE '%%br%%' and texture NOT LIKE '%%wb%%' + AND hzdepb_r > %s AND hzdept_r < %s + INNER JOIN chtexturegrp AS cht ON ch.chkey=cht.chkey WHERE cht.rvindicator = 'yes' AND ch.hzdept_r IS NOT NULL + AND + texture NOT LIKE '%%PM%%' and texture NOT LIKE '%%DOM' and texture NOT LIKE '%%MPT%%' and texture NOT LIKE '%%MUCK' and texture NOT LIKE '%%PEAT%%' and texture NOT LIKE '%%br%%' and texture NOT LIKE '%%wb%%' ORDER BY areasymbol, musym, muname, mu.mukey, comppct_r DESC, cokey, hzdept_r, hzdepb_r - SELECT #main.areasymbol, #main.musym, #main.muname, #main.MUKEY, - #main.COKEY, #main.CHKEY, #main.compname, hzname, hzdept_r, hzdepb_r, hzdept_r_ADJ, hzdepb_r_ADJ, thickness, sum_thickness, %s, comppct_r, SUM_COMP_PCT, WEIGHTED_COMP_PCT , - SUM((thickness/sum_thickness ) * %s) over (partition by #main.COKEY)AS COMP_WEIGHTED_AVERAGE - INTO #comp_temp2 - FROM #main - INNER JOIN #comp_temp3 ON #comp_temp3.cokey=#main.cokey - ORDER BY #main.areasymbol, #main.musym, #main.muname, #main.MUKEY, comppct_r DESC, #main.COKEY, hzdept_r, hzdepb_r - SELECT #comp_temp2.MUKEY,#comp_temp2.COKEY, WEIGHTED_COMP_PCT * COMP_WEIGHTED_AVERAGE AS COMP_WEIGHTED_AVERAGE1 - INTO #last_step - FROM #comp_temp2 - GROUP BY #comp_temp2.MUKEY,#comp_temp2.COKEY, WEIGHTED_COMP_PCT, COMP_WEIGHTED_AVERAGE - SELECT areasymbol, musym, muname, - #kitchensink.mukey, #last_step.COKEY, - CAST (SUM (COMP_WEIGHTED_AVERAGE1) over(partition by #kitchensink.mukey) as decimal(5,2)) AS %s - INTO #last_step2 - FROM #last_step - RIGHT OUTER JOIN #kitchensink ON #kitchensink.mukey=#last_step.mukey - GROUP BY #kitchensink.areasymbol, #kitchensink.musym, #kitchensink.muname, #kitchensink.mukey, COMP_WEIGHTED_AVERAGE1, #last_step.COKEY - ORDER BY #kitchensink.areasymbol, #kitchensink.musym, #kitchensink.muname, #kitchensink.mukey - SELECT #last_step2.areasymbol, #last_step2.musym, #last_step2.muname, - #last_step2.mukey, #last_step2.%s - FROM #last_step2 - LEFT OUTER JOIN #last_step ON #last_step.mukey=#last_step2.mukey - GROUP BY #last_step2.areasymbol, #last_step2.musym, #last_step2.muname, #last_step2.mukey, #last_step2.%s - ORDER BY #last_step2.areasymbol, #last_step2.musym, #last_step2.muname, #last_step2.mukey, #last_step2.%s", - gsub("^(l|mu)\\.","\\1ks.",where_clause), gsub("^(l|mu)\\.","\\11.",where_clause), - tDep, tDep, bDep, bDep, bDep, bDep, tDep, tDep, bDep, bDep, tDep, tDep, - agg_property, agg_property, + %s", + gsub("^(l|mu)\\.","\\1ks.", where_clause), + gsub("^(l|mu)\\.","\\11.", where_clause), + ifelse(dominant, " AND c1.cokey = (SELECT TOP 1 c2.cokey FROM component AS c2 + INNER JOIN mapunit AS mm1 ON + c2.mukey = mm1.mukey AND c2.mukey = mu1.mukey + ORDER BY c2.comppct_r DESC, c2.cokey)", ""), + top_depth, top_depth, + bottom_depth, bottom_depth, + bottom_depth, bottom_depth, + top_depth, top_depth, + bottom_depth, bottom_depth, + top_depth, top_depth, + paste0(sprintf("CAST (%s AS decimal (5,2)) AS %s", property, property), collapse=", "), where_clause, - tDep, bDep, - agg_property,agg_property,agg_property,agg_property,agg_property,agg_property), + top_depth, bottom_depth, + sprintf("SELECT #main.areasymbol, #main.musym, #main.muname, #main.mukey, +#main.cokey, #main.chkey, #main.compname, hzname, hzdept_r, hzdepb_r, hzdept_r_ADJ, hzdepb_r_ADJ, thickness, sum_thickness, %s, comppct_r, SUM_COMP_PCT, WEIGHTED_COMP_PCT, %s + INTO #comp_temp2 + FROM #main + INNER JOIN #comp_temp3 ON #comp_temp3.cokey=#main.cokey + ORDER BY #main.areasymbol, #main.musym, #main.muname, #main.mukey, + comppct_r DESC, #main.cokey, hzdept_r, hzdepb_r + SELECT #comp_temp2.mukey, #comp_temp2.cokey, %s + INTO #last_step + FROM #comp_temp2 + GROUP BY #comp_temp2.mukey, #comp_temp2.cokey, WEIGHTED_COMP_PCT, %s + SELECT areasymbol, musym, muname, #kitchensink.mukey, #last_step.cokey, %s + INTO #last_step2 + FROM #last_step + RIGHT OUTER JOIN #kitchensink ON #kitchensink.mukey = #last_step.mukey + GROUP BY #kitchensink.areasymbol, #kitchensink.musym, #kitchensink.muname, #kitchensink.mukey, %s, #last_step.cokey + ORDER BY #kitchensink.areasymbol, #kitchensink.musym, #kitchensink.muname, #kitchensink.mukey + SELECT #last_step2.areasymbol, #last_step2.musym, #last_step2.muname, #last_step2.mukey, %s + FROM #last_step2 + LEFT OUTER JOIN #last_step ON #last_step.mukey = #last_step2.mukey + GROUP BY #last_step2.areasymbol, #last_step2.musym, #last_step2.muname, #last_step2.mukey, %s + ORDER BY #last_step2.areasymbol, #last_step2.musym, #last_step2.muname, #last_step2.mukey, %s", +paste0(property, collapse = ", "), +paste0(sprintf("SUM((thickness/sum_thickness) * %s) OVER (PARTITION BY #main.cokey) AS DEPTH_WEIGHTED_AVERAGE%s", + property, n), collapse = ", "), +paste0(sprintf("WEIGHTED_COMP_PCT * DEPTH_WEIGHTED_AVERAGE%s AS COMP_WEIGHTED_AVERAGE%s", n, n), collapse = ", "), +paste0(sprintf("DEPTH_WEIGHTED_AVERAGE%s", n), collapse = ", "), +paste0(sprintf("CAST (SUM (COMP_WEIGHTED_AVERAGE%s) OVER (PARTITION BY #kitchensink.mukey) AS decimal(5,2)) AS %s", + n, property), collapse= ", "), +paste0(sprintf("COMP_WEIGHTED_AVERAGE%s", n), collapse = ", "), +paste0(sprintf("#last_step2.%s", property), collapse = ", "), +paste0(sprintf("#last_step2.%s", property), collapse = ", "), +paste0(sprintf("#last_step2.%s", property), collapse = ", "))) + } + + # create query based on method + switch(toupper(agg_method$method), + # dominant component (category) + "DOMINANT COMPONENT (CATEGORY)" = + sprintf("SELECT areasymbol, musym, muname, mu.mukey AS mukey, %s + FROM legend AS l + INNER JOIN mapunit AS mu ON mu.lkey = l.lkey AND %s + INNER JOIN component AS c ON c.mukey = mu.mukey AND + c.cokey = (SELECT TOP 1 c1.cokey FROM component AS c1 + INNER JOIN mapunit ON c.mukey=mapunit.mukey AND c1.mukey=mu.mukey ORDER BY c1.comppct_r DESC, c1.cokey)", + + paste0(sapply(agg_property, function(x) sprintf("%s AS %s", x, x)), collapse = ", "), + where_clause), + + # weighted average (.weighted_average handles vector agg_property) + "WEIGHTED AVERAGE" = .property_weighted_average(agg_property, top_depth, bottom_depth, where_clause), "MIN/MAX" = - sprintf("SELECT areasymbol, musym, muname, mu.mukey AS mukey, - (SELECT TOP 1 %s (chm1.%s) FROM component AS cm1 - INNER JOIN chorizon AS chm1 ON cm1.cokey = chm1.cokey AND - cm1.cokey = c.cokey - AND CASE - WHEN chm1.hzname LIKE '%%O%%' AND hzdept_r <10 THEN 2 - WHEN chm1.hzname LIKE '%%r%%' THEN 2 - WHEN chm1.hzname LIKE '%%' THEN 1 ELSE 1 END = 1) AS %s + sprintf("SELECT areasymbol, musym, muname, mu.mukey AS mukey, %s FROM legend AS l INNER JOIN mapunit AS mu ON mu.lkey = l.lkey AND %s INNER JOIN component AS c ON c.mukey = mu.mukey AND @@ -396,80 +477,15 @@ get_SDA_property <- INNER JOIN mapunit ON c.mukey = mapunit.mukey AND c1.mukey = mu.mukey ORDER BY c1.comppct_r DESC, c1.cokey)", - mmC, agg_property, agg_property, where_clause), + paste0(sapply(agg_property, function(x) .property_min_max(x, FUN = FUN)), collapse = ", "), + where_clause), - # dominant component (numeric) - "DOMINANT COMPONENT (NUMERIC)" = sprintf("SELECT areasymbol, musym, muname, mukey - INTO #kitchensink - FROM legend AS lks - INNER JOIN mapunit AS muks ON muks.lkey = lks.lkey AND %s - SELECT mu1.mukey, cokey, comppct_r, - SUM (comppct_r) over(partition by mu1.mukey ) AS SUM_COMP_PCT - INTO #comp_temp - FROM legend AS l1 - INNER JOIN mapunit AS mu1 ON mu1.lkey = l1.lkey AND %s - INNER JOIN component AS c1 ON c1.mukey = mu1.mukey AND majcompflag = 'Yes' - AND c1.cokey = - (SELECT TOP 1 c2.cokey FROM component AS c2 - INNER JOIN mapunit AS mm1 ON c2.mukey=mm1.mukey AND c2.mukey=mu1.mukey ORDER BY c2.comppct_r DESC, c2.cokey) - SELECT cokey, SUM_COMP_PCT, CASE WHEN comppct_r = SUM_COMP_PCT THEN 1 - ELSE CAST (CAST (comppct_r AS decimal (5,2)) / CAST (SUM_COMP_PCT AS decimal (5,2)) AS decimal (5,2)) END AS WEIGHTED_COMP_PCT - INTO #comp_temp3 - FROM #comp_temp - SELECT areasymbol, musym, muname, mu.mukey/1 AS MUKEY, c.cokey AS COKEY, ch.chkey/1 AS CHKEY, compname, hzname, hzdept_r, hzdepb_r, CASE WHEN hzdept_r < %s THEN %s ELSE hzdept_r END AS hzdept_r_ADJ, - CASE WHEN hzdepb_r > %s THEN %s ELSE hzdepb_r END AS hzdepb_r_ADJ, - CAST (CASE WHEN hzdepb_r > %s THEN %s ELSE hzdepb_r END - CASE WHEN hzdept_r < %s THEN %s ELSE hzdept_r END AS decimal (5,2)) AS thickness, - comppct_r, - CAST (SUM (CASE WHEN hzdepb_r > %s THEN %s ELSE hzdepb_r END - CASE WHEN hzdept_r < %s THEN %s ELSE hzdept_r END) over(partition by c.cokey) AS decimal (5,2)) AS sum_thickness, - CAST (ISNULL (%s , 0) AS decimal (5,2)) AS %s - INTO #main - FROM legend AS l - INNER JOIN mapunit AS mu ON mu.lkey = l.lkey AND %s - INNER JOIN component AS c ON c.mukey = mu.mukey - INNER JOIN chorizon AS ch ON ch.cokey=c.cokey AND hzname NOT LIKE '%%O%%' AND hzname NOT LIKE '%%r%%' - AND hzdepb_r > %s AND hzdept_r < %s - INNER JOIN chtexturegrp AS cht ON ch.chkey=cht.chkey WHERE cht.rvindicator = 'yes' AND ch.hzdept_r IS NOT NULL - AND - texture NOT LIKE '%%PM%%' and texture NOT LIKE '%%DOM' and texture NOT LIKE '%%MPT%%' and texture NOT LIKE '%%MUCK' and texture NOT LIKE '%%PEAT%%' and texture NOT LIKE '%%br%%' and texture NOT LIKE '%%wb%%' - ORDER BY areasymbol, musym, muname, mu.mukey, comppct_r DESC, cokey, hzdept_r, hzdepb_r - SELECT #main.areasymbol, #main.musym, #main.muname, #main.MUKEY, - #main.COKEY, #main.CHKEY, #main.compname, hzname, hzdept_r, hzdepb_r, hzdept_r_ADJ, hzdepb_r_ADJ, thickness, sum_thickness, %s, comppct_r, SUM_COMP_PCT, WEIGHTED_COMP_PCT , - SUM((thickness/sum_thickness ) * %s)over(partition by #main.COKEY)AS COMP_WEIGHTED_AVERAGE - INTO #comp_temp2 - FROM #main - INNER JOIN #comp_temp3 ON #comp_temp3.cokey=#main.cokey - ORDER BY #main.areasymbol, #main.musym, #main.muname, #main.MUKEY, comppct_r DESC, #main.COKEY, hzdept_r, hzdepb_r - SELECT #comp_temp2.MUKEY,#comp_temp2.COKEY, WEIGHTED_COMP_PCT * COMP_WEIGHTED_AVERAGE AS COMP_WEIGHTED_AVERAGE1 - INTO #last_step - FROM #comp_temp2 - GROUP BY #comp_temp2.MUKEY,#comp_temp2.COKEY, WEIGHTED_COMP_PCT, COMP_WEIGHTED_AVERAGE - SELECT areasymbol, musym, muname, - #kitchensink.mukey, #last_step.COKEY, - CAST (SUM (COMP_WEIGHTED_AVERAGE1) over(partition by #kitchensink.mukey) as decimal(5,2)) AS %s - INTO #last_step2 - FROM #last_step - RIGHT OUTER JOIN #kitchensink ON #kitchensink.mukey=#last_step.mukey - GROUP BY #kitchensink.areasymbol, #kitchensink.musym, #kitchensink.muname, #kitchensink.mukey, COMP_WEIGHTED_AVERAGE1, #last_step.COKEY - ORDER BY #kitchensink.areasymbol, #kitchensink.musym, #kitchensink.muname, #kitchensink.mukey - SELECT #last_step2.areasymbol, #last_step2.musym, #last_step2.muname, - #last_step2.mukey, #last_step2.%s - FROM #last_step2 - LEFT OUTER JOIN #last_step ON #last_step.mukey=#last_step2.mukey - GROUP BY #last_step2.areasymbol, #last_step2.musym, #last_step2.muname, #last_step2.mukey, #last_step2.%s - ORDER BY #last_step2.areasymbol, #last_step2.musym, #last_step2.muname, #last_step2.mukey, #last_step2.%s", - gsub("^(l|mu)\\.","\\1ks.",where_clause), gsub("^(l|mu)\\.","\\11.",where_clause), - tDep, tDep, bDep, bDep, bDep, bDep, tDep, tDep, bDep, bDep, tDep, tDep, - agg_property, agg_property, - where_clause, - tDep, bDep, - agg_property, agg_property, agg_property, agg_property, agg_property, agg_property), + # dominant component (numeric) (.dominant_component_numeric handles vector agg_property) + "DOMINANT COMPONENT (NUMERIC)" = .property_dominant_component_numeric(agg_property, top_depth, bottom_depth, where_clause), # dominant condition "DOMINANT CONDITION" = - sprintf("SELECT areasymbol, musym, muname, mu.mukey/1 AS mukey, - (SELECT TOP 1 %s FROM mapunit - INNER JOIN component ON component.mukey=mapunit.mukey AND mapunit.mukey = mu.mukey - GROUP BY %s, comppct_r ORDER BY SUM(comppct_r) over(partition by %s) DESC) AS %s + sprintf("SELECT areasymbol, musym, muname, mu.mukey/1 AS mukey, %s FROM legend AS l INNER JOIN mapunit AS mu ON mu.lkey = l.lkey AND %s INNER JOIN component AS c ON c.mukey = mu.mukey AND @@ -479,21 +495,22 @@ get_SDA_property <- ORDER BY c1.comppct_r DESC, c1.cokey) GROUP BY areasymbol, musym, muname, mu.mukey, c.cokey, compname, comppct_r ORDER BY areasymbol, musym, muname, mu.mukey, comppct_r DESC, c.cokey", - agg_property, agg_property, agg_property, agg_property, where_clause), - + paste0(sapply(agg_property, .property_dominant_condition_category), collapse = ", "), + where_clause), + # NO AGGREGATION (component properties) - "NONE" = sprintf("SELECT areasymbol, musym, muname, mu.mukey/1 AS mukey, - c.compname AS compname, c.comppct_r AS comppct_r, c.cokey AS cokey, + "NONE" = sprintf("SELECT areasymbol, musym, muname, mu.mukey/1 AS mukey, + c.compname AS compname, c.comppct_r AS comppct_r, c.cokey AS cokey, %s FROM legend AS l INNER JOIN mapunit AS mu ON mu.lkey = l.lkey AND %s INNER JOIN component AS c ON c.mukey = mu.mukey ORDER BY areasymbol, musym, muname, mu.mukey, c.comppct_r DESC, c.cokey", - paste0(sapply(agg_property, function(x) sprintf("c.%s AS %s", x, x)), collapse = ", "), + paste0(sapply(agg_property, function(x) sprintf("c.%s AS %s", x, x)), collapse = ", "), where_clause), - + # NO AGGREGATION (horizon properties) - "NONE_HORIZON" = sprintf("SELECT areasymbol, musym, muname, mu.mukey/1 AS mukey, + "NONE_HORIZON" = sprintf("SELECT areasymbol, musym, muname, mu.mukey/1 AS mukey, c.cokey AS cokey, ch.chkey AS chkey, c.compname AS compname, c.comppct_r AS comppct_r, ch.hzdept_r AS hzdept_r, ch.hzdepb_r AS hzdepb_r, @@ -503,7 +520,7 @@ get_SDA_property <- INNER JOIN component AS c ON c.mukey = mu.mukey INNER JOIN chorizon AS ch ON ch.cokey = c.cokey ORDER BY areasymbol, musym, muname, mu.mukey, c.comppct_r DESC, c.cokey, hzdept_r", - paste0(sapply(agg_property, function(x) sprintf("ch.%s AS %s", x, x)), collapse = ", "), + paste0(sapply(agg_property, function(x) sprintf("ch.%s AS %s", x, x)), collapse = ", "), where_clause) ) diff --git a/man/get_SDA_interpretation.Rd b/man/get_SDA_interpretation.Rd index aa04f4b0..ac53ae26 100644 --- a/man/get_SDA_interpretation.Rd +++ b/man/get_SDA_interpretation.Rd @@ -8,17 +8,23 @@ get_SDA_interpretation( rulename, method = c("Dominant Component", "Dominant Condition", "Weighted Average", "None"), areasymbols = NULL, - mukeys = NULL + mukeys = NULL, + query_string = FALSE, + not_rated_value = NA_real_ ) } \arguments{ -\item{rulename}{rule name of interpretation (matching a \code{mrulename} in \code{cointerp} table)} +\item{rulename}{character vector of interpretation rule names (matching \code{mrulename} in \code{cointerp} table)} \item{method}{aggregation method. One of: "Dominant Component", "Dominant Condition", "Weighted Average", "None". If "None" is selected one row will be returned per component, otherwise one row will be returned per map unit.} \item{areasymbols}{vector of soil survey area symbols} \item{mukeys}{vector of map unit keys} + +\item{query_string}{Default: \code{FALSE}; if \code{TRUE} return a character string containing query that would be sent to SDA via \code{SDA_query}} + +\item{not_rated_value}{used where rating class is "Not Rated". Default: \code{NA_real}} } \value{ a data.frame @@ -658,6 +664,21 @@ Get map unit interpretations from Soil Data Access by rule name \item WMS - Surface Water Management, System } } +} +\examples{ +\donttest{ +if(requireNamespace("curl") & + curl::has_internet()) { + + # get two forestry interpretations for CA630 + get_SDA_interpretation(c("FOR - Potential Seedling Mortality", + "FOR - Road Suitability (Natural Surface)"), + method = "Dominant Condition", + areasymbols = "CA630") +} +} + + } \author{ Jason Nemecek, Chad Ferguson, Andrew Brown diff --git a/man/get_SDA_property.Rd b/man/get_SDA_property.Rd index 78ebef89..d8e3da9d 100644 --- a/man/get_SDA_property.Rd +++ b/man/get_SDA_property.Rd @@ -12,11 +12,12 @@ get_SDA_property( mukeys = NULL, top_depth = 0, bottom_depth = 200, - FUN = NULL + FUN = NULL, + query_string = FALSE ) } \arguments{ -\item{property}{a label or column name from property dictionary} +\item{property}{character vector of labels from property dictionary tables (see details) OR physical column names from \code{component} or \code{chorizon} table.} \item{method}{one of: "Dominant Component (Category)", "Weighted Average", "Min/Max", "Dominant Component (Numeric)", "Dominant Condition", or "None". If "None" is selected, the number of rows returned will depend on whether a component or horizon level property was selected, otherwise the result will be 1:1 with the number of map units.} @@ -28,7 +29,9 @@ get_SDA_property( \item{bottom_depth}{Default: \code{200} (centimeters); a numeric value for lower boundary (bottom depth) used only for method="weighted average" and "dominant component (numeric)"} -\item{FUN}{Optional: character representing SQL aggregation function either "MIN" or "MAX" for method="min/max"} +\item{FUN}{Optional: character representing SQL aggregation function either "MIN" or "MAX" used only for method="min/max"} + +\item{query_string}{Default: \code{FALSE}; if \code{TRUE} return a character string containing query that would be sent to SDA via \code{SDA_query}} } \value{ a data.frame with result @@ -37,7 +40,7 @@ a data.frame with result Get map unit properties from Soil Data Access } \details{ -The \code{property} argument refers to one of the property names or columns specified in the tables below. +The \code{property} argument refers to one of the property names or columns specified in the tables below. Note that \code{property} can be specified as either a character vector of labeled properties, such as \code{"Bulk Density 0.33 bar H2O - Rep Value"}, OR physical column names such as \code{"dbthirdbar_r"}. To get "low" and "high" values for a particular property, replace the \verb{_r} with \verb{_l} or \verb{_h} in the physical column name; for example \code{property = c("dbthirdbar_l","dbthirdbar_r","dbthirdbar_h")}. You can view exhaustive lists of component and component horizon level properties in the Soil Data Access \href{https://sdmdataaccess.sc.egov.usda.gov/documents/TablesAndColumnsReport.pdf}{"Tables and Columns Report"}. \subsection{Selected Component-level Properties}{\tabular{ll}{ \strong{Property (Component)} \tab \strong{Column} \cr Range Production - Favorable Year \tab rsprod_h \cr @@ -122,6 +125,22 @@ The \code{property} argument refers to one of the property names or columns spec } } +} +\examples{ + +\donttest{ +if(requireNamespace("curl") & + curl::has_internet()) { + + # get 1/3 bar bulk density [0,25] centimeter depth weighted average from dominant component + get_SDA_property(property = c("dbthirdbar_l","dbthirdbar_r","dbthirdbar_h"), + method = "Dominant Component (Numeric)", + areasymbols = "CA630", + top_depth = 0, + bottom_depth = 25) +} +} + } \author{ Jason Nemecek, Chad Ferguson, Andrew Brown diff --git a/misc/getSDA_vectorize.R b/misc/getSDA_vectorize.R new file mode 100644 index 00000000..540e9384 --- /dev/null +++ b/misc/getSDA_vectorize.R @@ -0,0 +1,26 @@ +library(soilDB) + +# WORKS (dominant condition) +get_SDA_property(c("taxsuborder", "taxorder", "tfact"), + method = "dominant condition", + areasymbols = "CA630") +# WORKS (min/max) +get_SDA_property(c("ksat_l", "ksat_r", "ksat_h"), + method = "min", + areasymbols = "CA630") + +# WORKS (weighted average) +get_SDA_property(c("ksat_l", "ksat_r", "ksat_h"), + method = "weighted average", + areasymbols = 'CA630') + +# WORKS (dominant component, numeric -- special case of weighted average) +q <- get_SDA_property( + c("ksat_l", "ksat_r", "ksat_h"), + method = "dominant component (numeric)", + areasymbols = 'CA630', + query_string = TRUE # this just returns the query instead of calling SDA_query +) +# cat(q) +res <- SDA_query(q) +res diff --git a/misc/soil-data-aggregation/get_SDA_SOD-tests.R b/misc/soil-data-aggregation/get_SDA_SOD-tests.R new file mode 100644 index 00000000..80f11745 --- /dev/null +++ b/misc/soil-data-aggregation/get_SDA_SOD-tests.R @@ -0,0 +1,40 @@ +library(soilDB) + +# select a single mukey to check that top and bottom depth and different methods produce stable results +keys <- c(463276) + +s1 <- get_SDA_property(property = 'Available Water Capacity - Rep Value', + method = 'Weighted Average', + mukeys = keys, + top_depth = 0, + bottom_depth = 200) +s2 <- get_SDA_property(property = 'Available Water Capacity - Rep Value', + method = 'Weighted Average', + mukeys = keys, + top_depth = 0, + bottom_depth = 1000) +testthat::expect_equal(s1, s2) +testthat::expect_equal(s1$awc_r, 0.13) + +s3 <- get_SDA_property(property = 'Available Water Capacity - Rep Value', + method = 'Dominant Component (Numeric)', + mukeys = keys, + top_depth = 0, + bottom_depth = 200) +s4 <- get_SDA_property(property = 'Available Water Capacity - Rep Value', + method = 'Dominant Component (Numeric)', + mukeys = keys, + top_depth = 0, + bottom_depth = 1000) +testthat::expect_equal(s3, s4) +testthat::expect_equal(s3$awc_r, 0.18) + +s5 <- get_SDA_interpretation(rulename = 'FOR - Mechanical Planting Suitability', + method = 'Weighted Average', + mukeys = keys) +testthat::expect_equal(s5$rating_FORMechanicalPlantingSuitability, 0.99) + +s6 <- get_SDA_interpretation(rulename = 'FOR - Mechanical Planting Suitability', + method = 'Dominant Component', + mukeys = keys) +testthat::expect_equal(s6$rating_FORMechanicalPlantingSuitability, 0.987) diff --git a/tests/testthat/test-SDA_interpretations.R b/tests/testthat/test-SDA_interpretations.R index 45152386..2cda9cb8 100644 --- a/tests/testthat/test-SDA_interpretations.R +++ b/tests/testthat/test-SDA_interpretations.R @@ -13,9 +13,10 @@ test_that("SDA interpretations (dominant component) works", { method = "Dominant Component", areasymbols = target_areas) expect_equal(nrow(res), target_area_rows) - res <- get_SDA_interpretation("FOR - Potential Seedling Mortality", + res <- get_SDA_interpretation(c("FOR - Potential Seedling Mortality", + "FOR - Road Suitability (Natural Surface)"), method = "Dominant Component", mukeys = target_mukeys) - expect_equal(sort(res$MUKEY), sort(target_mukeys)) + expect_equal(sort(res$mukey), sort(target_mukeys)) }) test_that("SDA interpretations (dominant condition) works", { @@ -28,9 +29,10 @@ test_that("SDA interpretations (dominant condition) works", { expect_equal(nrow(res), target_area_rows) - res <- get_SDA_interpretation("FOR - Potential Seedling Mortality", + res <- get_SDA_interpretation(c("FOR - Potential Seedling Mortality", + "FOR - Road Suitability (Natural Surface)"), method = "Dominant Condition", mukeys = target_mukeys) - expect_equal(sort(res$MUKEY), sort(target_mukeys)) + expect_equal(sort(res$mukey), sort(target_mukeys)) }) test_that("SDA interpretations (weighted average) works", { @@ -43,20 +45,22 @@ test_that("SDA interpretations (weighted average) works", { expect_equal(nrow(res), target_area_rows) - res <- get_SDA_interpretation("FOR - Potential Seedling Mortality", + res <- get_SDA_interpretation(c("FOR - Potential Seedling Mortality", + "FOR - Road Suitability (Natural Surface)"), method = "Weighted Average", mukeys = target_mukeys) - expect_equal(sort(res$MUKEY), sort(target_mukeys)) + expect_equal(sort(res$mukey), sort(target_mukeys)) }) test_that("SDA interpretations (no aggregation) works", { skip_if_offline() - + skip_on_cran() - - res <- get_SDA_interpretation("FOR - Potential Seedling Mortality", - method = "NONE", + + res <- get_SDA_interpretation(c("FOR - Potential Seedling Mortality", + "FOR - Road Suitability (Natural Surface)"), + method = "NONE", areasymbols = target_areas) expect_equal(nrow(res), target_area_rows_all) - - + + }) diff --git a/tests/testthat/test-SDA_properties.R b/tests/testthat/test-SDA_properties.R index c27818c1..b0186bef 100644 --- a/tests/testthat/test-SDA_properties.R +++ b/tests/testthat/test-SDA_properties.R @@ -16,7 +16,7 @@ test_that("SDA properties (dominant condition) works", { method = "Dominant Condition", areasymbols = target_areas)), target_area_rows) - expect_equal(get_SDA_property(property = "Taxonomic Suborder", + expect_equal(get_SDA_property(property = c("Taxonomic Suborder","Taxonomic Order"), method = "Dominant Condition", mukeys = target_mukeys)$mukey, target_mukeys) @@ -30,7 +30,7 @@ test_that("SDA properties (dominant component category) works", { method = "Dominant Component (Category)", areasymbols = target_areas)), target_area_rows) - expect_equal(get_SDA_property(property = "Taxonomic Suborder", + expect_equal(get_SDA_property(property = c("Taxonomic Suborder","Taxonomic Order"), method = "Dominant Component (Category)", mukeys = target_mukeys)$mukey, target_mukeys) }) @@ -49,7 +49,7 @@ test_that("SDA properties (dominant component numeric) works", { )), target_area_rows) expect_equal(get_SDA_property( - property = "Very Coarse Sand - Rep Value", + property = c("sandvc_l","sandvc_r","sandvc_h"), method = "Dominant Component (Numeric)", mukeys = target_mukeys, top_depth = 25, @@ -72,7 +72,7 @@ test_that("SDA properties (weighted average) works", { )), target_area_rows) expect_equal(get_SDA_property( - property = "Total Clay - Rep Value", + property = c("claytotal_l","claytotal_r","claytotal_h"), method = "Weighted Average", mukeys = target_mukeys, top_depth = 25, @@ -93,7 +93,7 @@ test_that("SDA properties (min/max) works", { )), target_area_rows) expect_equal(get_SDA_property( - property = "Saturated Hydraulic Conductivity - Rep Value", + property = c("ksat_l","ksat_r","ksat_h"), method = "Min/Max", mukeys = target_mukeys, FUN = "MIN" @@ -102,17 +102,17 @@ test_that("SDA properties (min/max) works", { test_that("SDA properties (no aggregation) works", { skip_if_offline() - + skip_on_cran() - + # return results 1:1 with component for component properties - expect_equal(nrow(get_SDA_property(property = "Taxonomic Suborder", + expect_equal(nrow(get_SDA_property(property = c('rsprod_l','rsprod_r','rsprod_h'), method = "NONE", areasymbols = target_areas)), target_area_rows_all) - - + + # return results 1:1 with chorizon for horizon properties (includes cokey) - expect_equal(nrow(get_SDA_property("Total Sand - Rep Value", - method = "NONE", + expect_equal(nrow(get_SDA_property(c('sandtotal_l','sandtotal_r','sandtotal_h'), + method = "NONE", areasymbols = target_areas)), target_area_rows_all_chorizon) })