From 712db07495b844717bf7d1566cc690c2635d173c Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Wed, 1 Feb 2023 10:40:06 -0800 Subject: [PATCH] getTaxonAtLevel: allow multiple `level` values (returns data.frame); for #42 --- NEWS.md | 3 +- R/getTaxonAtLevel.R | 67 +++++++++++++++++++++++++----------------- man/getTaxonAtLevel.Rd | 10 +++++-- 3 files changed, 49 insertions(+), 31 deletions(-) diff --git a/NEWS.md b/NEWS.md index cb446c6..7b4ec59 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,9 @@ -# SoilTaxonomy 0.2.3 (2023-01-18) +# SoilTaxonomy 0.2.3 (2023-02-01) - Fix unintended case-sensitivity of `FormativeElements()`; thanks to Shawn Salley (@swsalley) - Fix for `extractSMR()` via fix for `FormativeElements()` applied at multiple levels (affects taxa above subgroup level) - Add `level_hierarchy()` a helper function for creating ordered factors related to the levels: "order", "suborder", "greatgroup", "subgroup", "family". - Taxon code lookup tables are now cached in the package environment (`SoilTaxonomy.env`) after the first time they are used for a moderate boost in performance in scenarios calling `taxon_code_to_taxon()` or `taxon_to_taxon_code()` many times + - `getTaxonAtLevel()` now supports multiple `level` values. In this case, a data.frame with column for each `level` is returned. Thanks to Meyer Bohn (@MollicMeyer) in #42 for suggestion. # SoilTaxonomy 0.2.2 (2022-10-01) * The order of dataset `ST` is now based on the full subgroup level code (https://github.com/ncss-tech/SoilTaxonomy/issues/35). Partial codes corresponding to each level of the hierarchy are also included in columns: `order_code`, `suborder_code`, `greatgroup_code` and `subgroup_code`. diff --git a/R/getTaxonAtLevel.R b/R/getTaxonAtLevel.R index 941e543..c576105 100644 --- a/R/getTaxonAtLevel.R +++ b/R/getTaxonAtLevel.R @@ -2,48 +2,61 @@ #' #' @param x A character vector containing subgroup-level taxonomic names #' @param level one of `c("order","suborder","greatgroup","subgroup")` +#' @param simplify Return a vector when `level` has length `1`? Default: `TRUE`. Otherwise, a data.frame is returned. +#' +#' @return A named character vector of taxa at specified level, where names are the internal Soil Taxonomy letter codes. When `length(level) > 1`? a data.frame is returned with column names for each `level`. +#' #' -#' @return A named character vector of taxa at specified level, where names are the internal Soil Taxonomy letter codes. -#' #' @export #' #' @examples -#' -#' # default gets the soil order +#' +#' # default gets the soil order #' getTaxonAtLevel(c("typic haplargids", "typic glacistels")) #, level = "order") -#' +#' #' # specify alternate levels #' getTaxonAtLevel("humic haploxerands", level = "greatgroup") -#' +#' #' # can't get subgroup (child) from great group (parent) #' getTaxonAtLevel("udifolists", level = "subgroup") -#' +#' #' # but can do parents of children #' getTaxonAtLevel("udifolists", level = "suborder") -#' -getTaxonAtLevel <- function(x, level = c("order","suborder","greatgroup","subgroup")) { - - level.names <- c("order","suborder","greatgroup","subgroup") - - level = match.arg(level, choices = level.names) +#' +#' # specify multiple levels (returns a list element for each level) +#' getTaxonAtLevel("hapludolls", c("order", "suborder", "greatgroup", "subgroup")) +getTaxonAtLevel <- function(x, level = "order", simplify = TRUE) { + + level.names <- c("order", "suborder", "greatgroup", "subgroup") + + level = match.arg(tolower(trimws(level)), choices = level.names, several.ok = TRUE) level.lut <- 1:4 names(level.lut) <- level.names - + levelid <- level.lut[level] ncharlevel <- levelid - - if (levelid == 4) - ncharlevel <- 4:5 - - needle <- decompose_taxon_code(taxon_to_taxon_code(x)) - res <- sapply(needle, function(y) { - if (length(y) >= levelid && nchar(y[[levelid]]) %in% ncharlevel) { - return(tolower(taxon_code_to_taxon(y[[levelid]]))) - } + + res <- lapply(levelid, function(i) { + if (i == 4) + ncharlevel <- 4:5 + + needle <- decompose_taxon_code(taxon_to_taxon_code(x)) + res <- sapply(needle, function(y) { + if (length(y) >= i && nchar(y[[i]]) %in% ncharlevel) { + return(tolower(taxon_code_to_taxon(y[[i]]))) + } + return(NA_character_) + }) + names(res) <- x + if (length(res) > 0) + return(res) return(NA_character_) }) - names(res) <- x - if(length(res) > 0) - return(res) - return(NA_character_) + + if (length(res) == 1 && simplify) { + return(res[[1]]) + } else { + names(res) <- level + return(as.data.frame(res)) + } } diff --git a/man/getTaxonAtLevel.Rd b/man/getTaxonAtLevel.Rd index 3c6f81c..812ae9b 100644 --- a/man/getTaxonAtLevel.Rd +++ b/man/getTaxonAtLevel.Rd @@ -4,22 +4,24 @@ \alias{getTaxonAtLevel} \title{Get the taxon name at the Soil Order, Suborder, Great Group or Subgroup level} \usage{ -getTaxonAtLevel(x, level = c("order", "suborder", "greatgroup", "subgroup")) +getTaxonAtLevel(x, level = "order", simplify = TRUE) } \arguments{ \item{x}{A character vector containing subgroup-level taxonomic names} \item{level}{one of \code{c("order","suborder","greatgroup","subgroup")}} + +\item{simplify}{Return a vector when \code{level} has length \code{1}? Default: \code{TRUE}. Otherwise, a data.frame is returned.} } \value{ -A named character vector of taxa at specified level, where names are the internal Soil Taxonomy letter codes. +A named character vector of taxa at specified level, where names are the internal Soil Taxonomy letter codes. When \code{length(level) > 1}? a data.frame is returned with column names for each \code{level}. } \description{ Get the taxon name at the Soil Order, Suborder, Great Group or Subgroup level } \examples{ -# default gets the soil order +# default gets the soil order getTaxonAtLevel(c("typic haplargids", "typic glacistels")) #, level = "order") # specify alternate levels @@ -31,4 +33,6 @@ getTaxonAtLevel("udifolists", level = "subgroup") # but can do parents of children getTaxonAtLevel("udifolists", level = "suborder") +# specify multiple levels (returns a list element for each level) +getTaxonAtLevel("hapludolls", c("order", "suborder", "greatgroup", "subgroup")) }