diff --git a/R/get_ecosite_history_from_NASIS_db.R b/R/get_ecosite_history_from_NASIS_db.R index 9796a876..cf2d3456 100644 --- a/R/get_ecosite_history_from_NASIS_db.R +++ b/R/get_ecosite_history_from_NASIS_db.R @@ -4,13 +4,14 @@ #' #' @param best Should the "best" ecological site correlation be chosen? Creates field called `es_selection_method` with `"most recent"` or `"least missing data"` for resolving many:1 relationships in site history. #' @param SS Use selected set? Default: `TRUE` +#' @param es_classifier Optional: character. Vector of classifier names (and corresponding records) to retain in final result. #' @param dsn Path to SQLite data source, or a `DBIConnection` to database with NASIS schema. #' #' @seealso [get_extended_data_from_NASIS_db()] #' #' @return a `data.frame`, or `NULL` on error #' @export -get_ecosite_history_from_NASIS_db <- function(best = TRUE, SS = TRUE, dsn = NULL) { +get_ecosite_history_from_NASIS_db <- function(best = TRUE, SS = TRUE, es_classifier = NULL, dsn = NULL) { .SD <- NULL @@ -39,5 +40,5 @@ get_ecosite_history_from_NASIS_db <- function(best = TRUE, SS = TRUE, dsn = NULL } # load "best" siteecositehistory records: creates column 'es_selection_method' w/ "most recent" or "least missing data" - as.data.frame(data.table::as.data.table(ecositehistory)[, .pickBestEcosite(.SD), by = list(siteiid = ecositehistory$siteiid)]) + as.data.frame(data.table::as.data.table(ecositehistory)[, .pickBestEcosite(.SD, es_classifier = es_classifier), by = list(siteiid = ecositehistory$siteiid)]) } diff --git a/R/utils.R b/R/utils.R index 0aeee319..d6908f4d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -103,13 +103,17 @@ ## TODO: this may need some review ## try and pick the best possible ecosite record -.pickBestEcosite <- function(d) { +.pickBestEcosite <- function(d, es_classifier = NULL) { + if (!is.null(es_classifier)) { + d <- d[which(d$es_classifier %in% es_classifier),] + } + # add a method field d$es_selection_method <- NA_character_ # try to get the most recent: - d.order <- order(d$ecositecorrdate, decreasing=TRUE) + d.order <- order(d$ecositecorrdate, decreasing = TRUE) # if there are multiple (unique) dates, return the most recent if (length(unique(d$ecositecorrdate)) > 1) { diff --git a/man/get_ecosite_history_from_NASIS_db.Rd b/man/get_ecosite_history_from_NASIS_db.Rd index 649d70df..b783ddbe 100644 --- a/man/get_ecosite_history_from_NASIS_db.Rd +++ b/man/get_ecosite_history_from_NASIS_db.Rd @@ -4,13 +4,20 @@ \alias{get_ecosite_history_from_NASIS_db} \title{Get Site Ecological Site History} \usage{ -get_ecosite_history_from_NASIS_db(best = TRUE, SS = TRUE, dsn = NULL) +get_ecosite_history_from_NASIS_db( + best = TRUE, + SS = TRUE, + es_classifier = NULL, + dsn = NULL +) } \arguments{ \item{best}{Should the "best" ecological site correlation be chosen? Creates field called \code{es_selection_method} with \code{"most recent"} or \code{"least missing data"} for resolving many:1 relationships in site history.} \item{SS}{Use selected set? Default: \code{TRUE}} +\item{es_classifier}{Optional: character. Vector of classifier names (and corresponding records) to retain in final result.} + \item{dsn}{Path to SQLite data source, or a \code{DBIConnection} to database with NASIS schema.} } \value{