From 1a05a8c881ae5934a812e90a11289c3bfe7a9a1d Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Tue, 1 Oct 2024 12:37:35 -0700 Subject: [PATCH] Add helpfile on NASIS Local Database sources; closes #356 --- R/database-sources.R | 17 +++++++++++++++++ R/fetchNASIS.R | 22 +++++++++++----------- R/get_RMF_from_NASIS_db.R | 2 +- R/get_soilseries_from_NASIS.R | 2 +- R/uncode.R | 6 +++--- man/NASISChoiceList.Rd | 2 +- man/NASISLocalDatabase.Rd | 18 ++++++++++++++++++ man/fetchNASIS.Rd | 22 +++++++++++----------- man/get_NASIS_metadata.Rd | 2 +- man/get_NASIS_table_metadata.Rd | 2 +- man/get_RMF_from_NASIS_db.Rd | 2 +- man/get_soilseries_from_NASIS.Rd | 2 +- 12 files changed, 67 insertions(+), 32 deletions(-) create mode 100644 R/database-sources.R create mode 100644 man/NASISLocalDatabase.Rd diff --git a/R/database-sources.R b/R/database-sources.R new file mode 100644 index 00000000..779b27da --- /dev/null +++ b/R/database-sources.R @@ -0,0 +1,17 @@ +#' NASIS Local Database +#' +#' This is a guide on using databases that follow the NASIS schema. Most of the time, users are querying an instance of the Microsoft SQL Server NASIS local transactional database running on their computer. It is possible to create file-based "snapshots" of a local instance of the NASIS database using SQLite. See [createStaticNASIS()] for details. +#' +#' # Working With Coded Values and Decoding +#' +#' Some values (choice lists) in NASIS are conventionally stored using numeric codes. The codes are defined by "domain" and allow for both "names" and "labels" as well as other descriptive information to be provided for each choice list element. See `[get_NASIS_column_metadata()]` for details. +#' +#' Many soilDB functions call the function `[uncode()]` internally to handle conversion to human-readable values using official NASIS domains. If writing queries directly against the database source, such as a connection created with `[NASIS()]` or query run with `dbQueryNASIS()`, you call `uncode()` on the _data.frame_ result of your query. Conversion of internal values to choice list names is based on domains associated with result column names. +#' +#' When using a custom SQLite database, sometimes values in the database are delivered pre-decoded to make the database more directly usable. An example of this would be the Kellogg Soil Survey Laboratory morphologic database, the NASIS data corresponding to the laboratory analyses available through the \link[=fetchLDM]{Lab Data Mart (LDM)}. +#' +#' To avoid calling issues with offsets between internal storage value and external readable value (for data such as farmland classification or Munsell color value and chroma), you should not call `uncode()` multiple times. Also, you can disable the "decoding" behavior made internally in soilDB functions by setting `options(soilDB.NASIS.skip_uncode = TRUE)`. +#' +#' +#' @name NASISLocalDatabase +NULL diff --git a/R/fetchNASIS.R b/R/fetchNASIS.R index e97037dd..348615a4 100644 --- a/R/fetchNASIS.R +++ b/R/fetchNASIS.R @@ -35,25 +35,25 @@ #' get_phfmp_from_NASIS_db #' get_concentrations_from_NASIS_db #' -#' @param from determines what objects should fetched? Default: `'pedons'`. Alternately, `'components'`, or `'pedon_report'`. -#' @param url string specifying the url for the NASIS pedon_report (default: +#' @param from Determines what objects should fetched? Default: `'pedons'`. Alternately, `'components'`, or `'pedon_report'`. +#' @param url String specifying the url for the NASIS pedon_report (default: #' `NULL`) -#' @param SS fetch data from the currently loaded selected set in NASIS or from -#' the entire local database (default: `TRUE`) -#' @param rmHzErrors should pedons with horizon depth errors be removed from +#' @param SS Fetch data from the currently loaded selected set in NASIS or from +#' the entire Local database (default: `TRUE`) +#' @param rmHzErrors Should pedons with horizon depth errors be removed from #' the results? (default: `FALSE`) -#' @param nullFragsAreZero should fragment volumes of `NULL` be interpreted as `0`? +#' @param nullFragsAreZero Should fragment volumes of `NULL` be interpreted as `0`? #' (default: `TRUE`), see details #' @param soilColorState Used only for `from = 'pedons'`; which colors should be used to generate the convenience field `soil_color`? (`'moist'` or `'dry'`) -#' @param mixColors should mixed colors be calculated (Default: `TRUE`) where multiple colors are populated for the same moisture state in a horizon? `FALSE` takes the dominant color for each horizon moist/dry state. -#' @param lab should the `phlabresults` child table be fetched with site/pedon/horizon data (default: `FALSE`) -#' @param fill include pedon or component records without horizon data in result? (default: `FALSE`) +#' @param mixColors Should mixed colors be calculated (Default: `TRUE`) where multiple colors are populated for the same moisture state in a horizon? `FALSE` takes the dominant color for each horizon moist/dry state. +#' @param lab Should the `phlabresults` child table be fetched with site/pedon/horizon data (default: `FALSE`) +#' @param fill Include pedon or component records without horizon data in result? (default: `FALSE`) #' @param dropAdditional Used only for `from='components'` with `duplicates = TRUE`. Prevent "duplication" of `mustatus == "additional"` mapunits? Default: `TRUE` #' @param dropNonRepresentative Used only for `from='components'` with `duplicates = TRUE`. Prevent "duplication" of non-representative data mapunits? Default: `TRUE` #' @param duplicates Used only for `from='components'`. Duplicate components for all instances of use (i.e. one for each legend data mapunit is used on; optionally for additional mapunits, and/or non-representative data mapunits?). This will include columns from `get_component_correlation_data_from_NASIS_db()` that identify which legend(s) a component is used on. #' @param stringsAsFactors deprecated -#' @param dsn Optional: path to local SQLite database containing NASIS table structure; default: `NULL` -#' @return a `SoilProfileCollection` object +#' @param dsn Optional: path or _DBIConnection_ to \link[=NASISLocalDatabase]{local database containing NASIS table structure}; default: `NULL` +#' @return A `SoilProfileCollection` object #' @seealso `get_component_data_from_NASIS()` #' @author D. E. Beaudette, J. M. Skovlin, S.M. Roecker, A.G. Brown #' diff --git a/R/get_RMF_from_NASIS_db.R b/R/get_RMF_from_NASIS_db.R index 662408e1..c8b5ca46 100644 --- a/R/get_RMF_from_NASIS_db.R +++ b/R/get_RMF_from_NASIS_db.R @@ -6,7 +6,7 @@ #' #' @param SS logical, limit query to the selected set #' -#' @param dsn optional path to local SQLite database containing NASIS table structure; default: `NULL` +#' @param dsn optional path or _DBIConnection_ to \link[=NASISLocalDatabase]{local database containing NASIS table structure}; default: `NULL` #' #' @return a `list` with two `data.frame` objects: #' * `RMF`: contents of "phrdxfeatures" table, often >1 row per horizon diff --git a/R/get_soilseries_from_NASIS.R b/R/get_soilseries_from_NASIS.R index 8c79a8cb..8b6a20a1 100644 --- a/R/get_soilseries_from_NASIS.R +++ b/R/get_soilseries_from_NASIS.R @@ -4,7 +4,7 @@ #' from the local NASIS database (all series) or via web report (named series #' only). #' @param stringsAsFactors deprecated -#' @param dsn Optional: path to local SQLite database containing NASIS table structure; default: `NULL` +#' @param dsn Optional: path or _DBIConnection_ to \link[=NASISLocalDatabase]{local database containing NASIS table structure}; default: `NULL` #' @param delimiter _character_. Used to collapse `taxminalogy` records where multiple values are used to describe strongly contrasting control sections. Default `" over "` creates combination mineralogy classes as they would be used in the family name. #' @param SS _logical_. Fetch data from the currently loaded selected set in NASIS or from the entire local database (default: `FALSE`; this is to allow for queries against the full Series Classification database as default) #' @return A \code{data.frame} diff --git a/R/uncode.R b/R/uncode.R index 2c3b8d66..24271fb6 100644 --- a/R/uncode.R +++ b/R/uncode.R @@ -179,7 +179,7 @@ NASISDomainsAsFactor <- function(x = NULL) { #' #' These data are derived from the MetadataDomainDetail, MetadataDomainMaster, and MetadataTableColumn tables and help with mapping between values stored in the NASIS database and human-readable values. The human-readable values align with the values returned in public facing interfaces such as SSURGO via Soil Data Access and NASIS Web Reports. The data in these tables can also be used to create _ordered_ factors where options for levels of a particular data element follow a logical `ChoiceSequence`. #' -#' @param dsn Optional: path to local SQLite database containing NASIS table structure; default: `NULL` +#' @param dsn Optional: path or _DBIConnection_ to \link[=NASISLocalDatabase]{local database containing NASIS table structure}; default: `NULL` #' @param include_description Include "ChoiceDescription" column? Default: `FALSE` #' @details If a local NASIS instance is set up, and this is the first time `get_NASIS_metadata()` has been called, the metadata will be obtained from the NASIS local database. Subsequent runs in the same session will use a copy of the data object `NASIS.metadata` cached in `soilDB.env` which can be accessed with `get_soilDB_env()$NASIS.metadata`. #' @@ -268,7 +268,7 @@ get_NASIS_column_metadata <- function(x, #' @param droplevels Drop unused factor levels? Default: `TRUE` (used only when `factor=TRUE`) #' @param ordered Should the result be an ordered factor? Default: `TRUE` (use _only_ if `DomainRanked` is true for all choices) #' @param simplify Should list result with length 1 be reduced to a single vector? Default: `TRUE` -#' @param dsn Optional: path to local SQLite database containing NASIS table structure; default: NULL +#' @param dsn Optional: path or _DBIConnection_ to \link[=NASISLocalDatabase]{local database containing NASIS table structure}; default: NULL #' @return A list of "choices" based on the input `x` that have been converted to a consistent target set of levels (specified by `choice`) via NASIS 7 metadata. #' #' When `factor=TRUE` the result is a factor, possibly ordered when `ordered=TRUE` and the target domain is a "ranked" domain (i.e. `ChoiceSequence` has logical meaning). @@ -363,7 +363,7 @@ NASISChoiceList <- function(x = NULL, #' @param what.table Column to match `table` against. Default: `TablePhysicalName`. #' @param what.column Column to match `column` against. Default: `ColumnPhysicalName`. #' @param query_string Default: `FALSE`; if `TRUE` return a character containing query that would be sent to NASIS. -#' @param dsn Optional: path to local SQLite database containing NASIS table structure; default: `NULL` +#' @param dsn Optional: path or _DBIConnection_ to \link[=NASISLocalDatabase]{local database containing NASIS table structure}; default: `NULL` #' @details For NASIS choice lists based on domain and column names see `get_NASIS_metadata()` and `NASISChoiceList()`. This function (`get_NASIS_table_metadata()`) is intended for higher-level description of the expected contents of a NASIS database instance, rather than the codes/specific values used within columns. #' @seealso `get_NASIS_metadata()` `NASISChoiceList()` `uncode()` `code()` #' @return a `data.frame` diff --git a/man/NASISChoiceList.Rd b/man/NASISChoiceList.Rd index 77c75d0e..83a8908a 100644 --- a/man/NASISChoiceList.Rd +++ b/man/NASISChoiceList.Rd @@ -36,7 +36,7 @@ NASISChoiceList( \item{simplify}{Should list result with length 1 be reduced to a single vector? Default: \code{TRUE}} -\item{dsn}{Optional: path to local SQLite database containing NASIS table structure; default: NULL} +\item{dsn}{Optional: path or \emph{DBIConnection} to \link[=NASISLocalDatabase]{local database containing NASIS table structure}; default: NULL} } \value{ A list of "choices" based on the input \code{x} that have been converted to a consistent target set of levels (specified by \code{choice}) via NASIS 7 metadata. diff --git a/man/NASISLocalDatabase.Rd b/man/NASISLocalDatabase.Rd new file mode 100644 index 00000000..0043d498 --- /dev/null +++ b/man/NASISLocalDatabase.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database-sources.R +\name{NASISLocalDatabase} +\alias{NASISLocalDatabase} +\title{NASIS Local Database} +\description{ +This is a guide on using databases that follow the NASIS schema. Most of the time, users are querying an instance of the Microsoft SQL Server NASIS local transactional database running on their computer. It is possible to create file-based "snapshots" of a local instance of the NASIS database using SQLite. See \code{\link[=createStaticNASIS]{createStaticNASIS()}} for details. +} +\section{Working With Coded Values and Decoding}{ +Some values (choice lists) in NASIS are conventionally stored using numeric codes. The codes are defined by "domain" and allow for both "names" and "labels" as well as other descriptive information to be provided for each choice list element. See \verb{[get_NASIS_column_metadata()]} for details. + +Many soilDB functions call the function \verb{[uncode()]} internally to handle conversion to human-readable values using official NASIS domains. If writing queries directly against the database source, such as a connection created with \verb{[NASIS()]} or query run with \code{dbQueryNASIS()}, you call \code{uncode()} on the \emph{data.frame} result of your query. Conversion of internal values to choice list names is based on domains associated with result column names. + +When using a custom SQLite database, sometimes values in the database are delivered pre-decoded to make the database more directly usable. An example of this would be the Kellogg Soil Survey Laboratory morphologic database, the NASIS data corresponding to the laboratory analyses available through the \link[=fetchLDM]{Lab Data Mart (LDM)}. + +To avoid calling issues with offsets between internal storage value and external readable value (for data such as farmland classification or Munsell color value and chroma), you should not call \code{uncode()} multiple times. Also, you can disable the "decoding" behavior made internally in soilDB functions by setting \code{options(soilDB.NASIS.skip_uncode = TRUE)}. +} + diff --git a/man/fetchNASIS.Rd b/man/fetchNASIS.Rd index 1d66dba9..10bd364c 100644 --- a/man/fetchNASIS.Rd +++ b/man/fetchNASIS.Rd @@ -34,27 +34,27 @@ get_concentrations_from_NASIS_db( get_phfmp_from_NASIS_db(SS = TRUE, stringsAsFactors = NULL, dsn = NULL) } \arguments{ -\item{from}{determines what objects should fetched? Default: \code{'pedons'}. Alternately, \code{'components'}, or \code{'pedon_report'}.} +\item{from}{Determines what objects should fetched? Default: \code{'pedons'}. Alternately, \code{'components'}, or \code{'pedon_report'}.} -\item{url}{string specifying the url for the NASIS pedon_report (default: +\item{url}{String specifying the url for the NASIS pedon_report (default: \code{NULL})} -\item{SS}{fetch data from the currently loaded selected set in NASIS or from -the entire local database (default: \code{TRUE})} +\item{SS}{Fetch data from the currently loaded selected set in NASIS or from +the entire Local database (default: \code{TRUE})} -\item{rmHzErrors}{should pedons with horizon depth errors be removed from +\item{rmHzErrors}{Should pedons with horizon depth errors be removed from the results? (default: \code{FALSE})} -\item{nullFragsAreZero}{should fragment volumes of \code{NULL} be interpreted as \code{0}? +\item{nullFragsAreZero}{Should fragment volumes of \code{NULL} be interpreted as \code{0}? (default: \code{TRUE}), see details} \item{soilColorState}{Used only for \code{from = 'pedons'}; which colors should be used to generate the convenience field \code{soil_color}? (\code{'moist'} or \code{'dry'})} -\item{mixColors}{should mixed colors be calculated (Default: \code{TRUE}) where multiple colors are populated for the same moisture state in a horizon? \code{FALSE} takes the dominant color for each horizon moist/dry state.} +\item{mixColors}{Should mixed colors be calculated (Default: \code{TRUE}) where multiple colors are populated for the same moisture state in a horizon? \code{FALSE} takes the dominant color for each horizon moist/dry state.} -\item{lab}{should the \code{phlabresults} child table be fetched with site/pedon/horizon data (default: \code{FALSE})} +\item{lab}{Should the \code{phlabresults} child table be fetched with site/pedon/horizon data (default: \code{FALSE})} -\item{fill}{include pedon or component records without horizon data in result? (default: \code{FALSE})} +\item{fill}{Include pedon or component records without horizon data in result? (default: \code{FALSE})} \item{dropAdditional}{Used only for \code{from='components'} with \code{duplicates = TRUE}. Prevent "duplication" of \code{mustatus == "additional"} mapunits? Default: \code{TRUE}} @@ -64,10 +64,10 @@ the results? (default: \code{FALSE})} \item{stringsAsFactors}{deprecated} -\item{dsn}{Optional: path to local SQLite database containing NASIS table structure; default: \code{NULL}} +\item{dsn}{Optional: path or \emph{DBIConnection} to \link[=NASISLocalDatabase]{local database containing NASIS table structure}; default: \code{NULL}} } \value{ -a \code{SoilProfileCollection} object +A \code{SoilProfileCollection} object } \description{ Fetch commonly used site/pedon/horizon or mapunit component data from NASIS, diff --git a/man/get_NASIS_metadata.Rd b/man/get_NASIS_metadata.Rd index bfc34749..142e80c0 100644 --- a/man/get_NASIS_metadata.Rd +++ b/man/get_NASIS_metadata.Rd @@ -15,7 +15,7 @@ get_NASIS_column_metadata( ) } \arguments{ -\item{dsn}{Optional: path to local SQLite database containing NASIS table structure; default: \code{NULL}} +\item{dsn}{Optional: path or \emph{DBIConnection} to \link[=NASISLocalDatabase]{local database containing NASIS table structure}; default: \code{NULL}} \item{include_description}{Include "ChoiceDescription" column? Default: \code{FALSE}} diff --git a/man/get_NASIS_table_metadata.Rd b/man/get_NASIS_table_metadata.Rd index 27f0bae1..077109fa 100644 --- a/man/get_NASIS_table_metadata.Rd +++ b/man/get_NASIS_table_metadata.Rd @@ -24,7 +24,7 @@ get_NASIS_table_metadata( \item{query_string}{Default: \code{FALSE}; if \code{TRUE} return a character containing query that would be sent to NASIS.} -\item{dsn}{Optional: path to local SQLite database containing NASIS table structure; default: \code{NULL}} +\item{dsn}{Optional: path or \emph{DBIConnection} to \link[=NASISLocalDatabase]{local database containing NASIS table structure}; default: \code{NULL}} } \value{ a \code{data.frame} diff --git a/man/get_RMF_from_NASIS_db.Rd b/man/get_RMF_from_NASIS_db.Rd index 5e5e6433..83981a61 100644 --- a/man/get_RMF_from_NASIS_db.Rd +++ b/man/get_RMF_from_NASIS_db.Rd @@ -9,7 +9,7 @@ get_RMF_from_NASIS_db(SS = TRUE, dsn = NULL) \arguments{ \item{SS}{logical, limit query to the selected set} -\item{dsn}{optional path to local SQLite database containing NASIS table structure; default: \code{NULL}} +\item{dsn}{optional path or \emph{DBIConnection} to \link[=NASISLocalDatabase]{local database containing NASIS table structure}; default: \code{NULL}} } \value{ a \code{list} with two \code{data.frame} objects: diff --git a/man/get_soilseries_from_NASIS.Rd b/man/get_soilseries_from_NASIS.Rd index 495b3fd9..ecc27d79 100644 --- a/man/get_soilseries_from_NASIS.Rd +++ b/man/get_soilseries_from_NASIS.Rd @@ -25,7 +25,7 @@ get_competing_soilseries_from_NASIS( \arguments{ \item{stringsAsFactors}{deprecated} -\item{dsn}{Optional: path to local SQLite database containing NASIS table structure; default: \code{NULL}} +\item{dsn}{Optional: path or \emph{DBIConnection} to \link[=NASISLocalDatabase]{local database containing NASIS table structure}; default: \code{NULL}} \item{delimiter}{\emph{character}. Used to collapse \code{taxminalogy} records where multiple values are used to describe strongly contrasting control sections. Default \code{" over "} creates combination mineralogy classes as they would be used in the family name.}