From 8bbdd23187ef737b173500d60eaec960ba43def0 Mon Sep 17 00:00:00 2001
From: "Andrew G. Brown" <andrew.g.brown@usda.gov>
Date: Thu, 16 Nov 2023 09:44:20 -0800
Subject: [PATCH] `fetchLDM`: add custom `WHERE` clause argument

---
 DESCRIPTION     |  2 +-
 NEWS.md         |  5 ++++-
 R/fetchLDM.R    | 12 +++++++++---
 man/fetchLDM.Rd |  3 +++
 man/fetchOSD.Rd |  1 +
 5 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 04c7f5aa..7aa414ee 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: soilDB
 Type: Package
 Title: Soil Database Interface
-Version: 2.7.9.9000
+Version: 2.7.10
 Authors@R: c(person(given="Dylan", family="Beaudette", role = c("aut"), email = "dylan.beaudette@usda.gov"),
              person(given="Jay", family="Skovlin", role = c("aut")), 
              person(given="Stephen", family="Roecker", role = c("aut")), 
diff --git a/NEWS.md b/NEWS.md
index 7bfdf094..5cbfbade 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,5 @@
-# soilDB 2.7.9.9000 (2023-10-04)
+# soilDB 2.7.10 (2023-11-16)
+
  - `fetchSDA_spatial()` gains `geom.src="mlrapolygon"` for obtaining Major Land Resource Area (MLRA) polygon boundaries. When using this geometry source `x` is a vector of `MLRARSYM` (MLRA Symbols).
  
    - The geometry source is the MLRA Geographic Database v5.2 (2022) which is not (yet) part of Soil Data Access. Instead of SDA, GDAL utilities are used to read a zipped ESRI Shapefile from a remote URL: <https://www.nrcs.usda.gov/sites/default/files/2022-10/MLRA_52_2022.zip>. Therefore, most additional `fetchSDA_spatial()` arguments are _not_ currently supported for the MLRA geometry source. In the future a `mlrapolygon` table may be added to SDA (analogous to  `mupolygon` and `sapolygon`), and the function will be updated accordingly at that time.
@@ -7,6 +8,8 @@
 
  - `get_SDA_coecoclass()` gains `method="all"` for aggregating information about ecological sites and related components. The method performs a condition-based aggregation for each ecological site condition in the map unit, producing a "wide" data.frame result with as many columns as needed to portray all site conditions.
 
+ - `fetchLDM()` gains new argument `WHERE` for supplying a custom SQL where clause for selecting sites of interest. For example: `fetchLDM(WHERE = "CASE WHEN corr_name IS NOT NULL THEN LOWER(corr_name) ELSE LOWER(samp_name) END = 'musick'")`
+ 
 # soilDB 2.7.9 (2023-09-01)
  
  - Added new `method` options for `fetchSDA_spatial()`. Aggregation grouping is controlled by the `by.col` argument. This works for mapunit and survey area polygon geometries, aggregating all polygons in the group for each `mukey`, `nationalmusym`, `lkey`, or `areasymbol` extent.
diff --git a/R/fetchLDM.R b/R/fetchLDM.R
index 09f15ac7..a31b8bcb 100644
--- a/R/fetchLDM.R
+++ b/R/fetchLDM.R
@@ -9,6 +9,7 @@
 #' @param what A single column name from tables: `lab_combine_nasis_ncss`, `lab_webmap`, `lab_site`, `lab_pedon` or `lab_area`
 #' @param bycol A single column name from `lab_layer` used for processing chunks; default: `"pedon_key"`
 #' @param tables A vector of table names; Default is `"lab_physical_properties"`, `"lab_chemical_properties"`, `"lab_calculations_including_estimates_and_default_values"`, and `"lab_rosetta_Key"`. May also include one or more of: `"lab_mir"`, `"lab_mineralogy_glass_count"`, `"lab_major_and_trace_elements_and_oxides"`, `"lab_xray_and_thermal"` but it will be necessary to select appropriate `prep_code` and `analyzed_size_frac` for your analysis (see _Details_).
+#' @param WHERE character. A custom SQL WHERE clause, which overrides `x`, `what`, and `bycol`, such as `CASE WHEN corr_name IS NOT NULL THEN LOWER(corr_name) ELSE LOWER(samp_name) END = 'musick'`
 #' @param chunk.size Number of pedons per chunk (for queries that may exceed `maxJsonLength`)
 #' @param ntries Number of tries (times to halve `chunk.size`) before returning `NULL`; default `3`
 #' @param layer_type Default: `"horizon"`, `"layer"`, and `"reporting layer"`
@@ -54,6 +55,7 @@ fetchLDM <- function(x = NULL,
              "lab_rosetta_Key"
              # "lab_mir"
              ),
+           WHERE = NULL,
            chunk.size = 1000,
            ntries = 3,
            layer_type = c("horizon","layer","reporting layer"),
@@ -122,6 +124,10 @@ fetchLDM <- function(x = NULL,
   # TODO: set up arbitrary area queries by putting area table into groups:
   #       country, state, county, mlra, ssa, npark, nforest
 
+  if (!is.null(x) && (missing(WHERE) || is.null(WHERE))) {
+    WHERE <- sprintf("LOWER(%s) IN %s", what, format_SQL_in_statement(tolower(x)))
+  } 
+  
   # get site/pedon/area information
   site_query <- paste0(
     "SELECT * FROM lab_combine_nasis_ncss
@@ -131,7 +137,7 @@ fetchLDM <- function(x = NULL,
                   lab_combine_nasis_ncss.site_key = lab_site.site_key
               LEFT JOIN lab_pedon ON
                   lab_combine_nasis_ncss.site_key = lab_pedon.site_key ", 
-            ifelse(is.null(x), "", sprintf("WHERE LOWER(%s) IN %s", what, format_SQL_in_statement(tolower(x)))))
+            ifelse(is.null(x) && is.null(WHERE), "", paste("WHERE", WHERE)))
 
   if (inherits(con, 'DBIConnection')) {
     # query con using (modified) site_query
@@ -143,10 +149,10 @@ fetchLDM <- function(x = NULL,
   } else {
     # the lab_area table allows for overlap with many different area types
     # for now we only offer the "ssa" (soil survey area) area_type 
-    site_query_ssaarea <- gsub("WHERE LOWER", 
+    site_query_ssaarea <- gsub("WHERE", 
                        "LEFT JOIN lab_area ON
                        lab_combine_nasis_ncss.ssa_key = lab_area.area_key
-                       WHERE LOWER", site_query)
+                       WHERE", site_query)
     sites <- suppressMessages(SDA_query(site_query_ssaarea))
   }
 
diff --git a/man/fetchLDM.Rd b/man/fetchLDM.Rd
index f454bd40..2bb96071 100644
--- a/man/fetchLDM.Rd
+++ b/man/fetchLDM.Rd
@@ -10,6 +10,7 @@ fetchLDM(
   bycol = "pedon_key",
   tables = c("lab_physical_properties", "lab_chemical_properties",
     "lab_calculations_including_estimates_and_default_values", "lab_rosetta_Key"),
+  WHERE = NULL,
   chunk.size = 1000,
   ntries = 3,
   layer_type = c("horizon", "layer", "reporting layer"),
@@ -27,6 +28,8 @@ fetchLDM(
 
 \item{tables}{A vector of table names; Default is \code{"lab_physical_properties"}, \code{"lab_chemical_properties"}, \code{"lab_calculations_including_estimates_and_default_values"}, and \code{"lab_rosetta_Key"}. May also include one or more of: \code{"lab_mir"}, \code{"lab_mineralogy_glass_count"}, \code{"lab_major_and_trace_elements_and_oxides"}, \code{"lab_xray_and_thermal"} but it will be necessary to select appropriate \code{prep_code} and \code{analyzed_size_frac} for your analysis (see \emph{Details}).}
 
+\item{WHERE}{character. A custom SQL WHERE clause, which overrides \code{x}, \code{what}, and \code{bycol}, such as \verb{CASE WHEN corr_name IS NOT NULL THEN LOWER(corr_name) ELSE LOWER(samp_name) END = 'musick'}}
+
 \item{chunk.size}{Number of pedons per chunk (for queries that may exceed \code{maxJsonLength})}
 
 \item{ntries}{Number of tries (times to halve \code{chunk.size}) before returning \code{NULL}; default \code{3}}
diff --git a/man/fetchOSD.Rd b/man/fetchOSD.Rd
index 21706209..b8ea68ce 100644
--- a/man/fetchOSD.Rd
+++ b/man/fetchOSD.Rd
@@ -47,6 +47,7 @@ The standard set of "site" and "horizon" data are returned as a \code{SoilProfil
 \item{pmkind}{empirical probabilities for parent material kind, derived from the current SSURGO snapshot}
 \item{pmorigin}{empirical probabilities for parent material origin, derived from the current SSURGO snapshot}
 \item{mlra}{empirical MLRA membership values, derived from the current SSURGO snapshot}
+\item{ecoclassid}{area cross-tabulation of ecoclassid by soil series name, derived from the current SSURGO snapshot, major components only}
 \item{climate}{experimental climate summaries from PRISM stack (CONUS only)}
 
 \item{NCCPI}{select quantiles of NCCPI and Irrigated NCCPI, derived from the current SSURGO snapshot}