From 7b309759838a2529589655a3d17fc06a96940429 Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Thu, 10 Oct 2024 16:06:07 -0700 Subject: [PATCH] implement custom matching function argument `FUN` --- R/collapseHz.R | 41 +++++++++++++++++++++++++++++++++-------- man/collapseHz.Rd | 43 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 67 insertions(+), 17 deletions(-) diff --git a/R/collapseHz.R b/R/collapseHz.R index 1d4c45ee..43c8f58a 100644 --- a/R/collapseHz.R +++ b/R/collapseHz.R @@ -1,12 +1,28 @@ #' Collapse Horizons within Profiles Based on Pattern Matching #' -#' Combines and aggregates layers by grouping adjacent horizons that match `pattern` in `hzdesgn`. Numeric properties are combined using the weighted average, and other properties are derived from the thickest horizon in each group. +#' Combines and aggregates data for layers by grouping adjacent horizons that +#' match `pattern` in `hzdesgn`. Numeric properties are combined using the +#' weighted average, and other properties are derived from the thickest horizon +#' in each group. #' #' @param x A _SoilProfileCollection_ -#' @param pattern _character_. A regular expression pattern to match in `hzdesgn` column -#' @param hzdesgn _character_. Any character column containing horizon-level identifiers. Default is estimated using `guessHzDesgnName()`. -#' @param ignore.case _logical_. If `FALSE`, the pattern matching is case sensitive and if `TRUE`, case is ignored during matching. Default: `FALSE` -#' @param na.rm _logical_. If `TRUE` `NA` values are ignored when calculating min/max boundaries for each group and in weighted averages. If `FALSE` `NA` values are propagated to the result. Default: `FALSE` +#' @param pattern _character_. A regular expression pattern to match in `hzdesgn` +#' column +#' @param hzdesgn _character_. Any character column containing horizon-level +#' identifiers. Default is estimated using `guessHzDesgnName()`. +#' @param FUN _function_. A function that returns a _logical_ vector equal in +#' length to the number of horizons in `x`. See details. +#' @param ... Additional arguments passed to the matching function `FUN`. +#' @param na.rm _logical_. If `TRUE` `NA` values are ignored when calculating +#' min/max boundaries for each group and in weighted averages. If `FALSE` `NA` +#' values are propagated to the result. Default: `FALSE` +#' +#' @details +#' +#' If a custom function (`FUN`) is used, it should accept arbitrary additional +#' arguments via an ellipsis (`...`). It is not necessary to do anything with +#' arguments, but the result should match the number of horizons found in the +#' input SoilProfileCollection `x`. #' #' @return A _SoilProfileCollection_ #' @export @@ -14,12 +30,21 @@ #' @examples #' data(jacobs2000) #' -#' a <- collapseHz(jacobs2000, c(`A` = "^A", `E` = "E", `Bt` = "[ABC]+t", `C` = "^C", `foo` = "bar")) +#' a <- collapseHz(jacobs2000, c(`A` = "^A", +#' `E` = "E", +#' `Bt` = "[ABC]+t", +#' `C` = "^C", +#' `foo` = "bar")) #' b <- jacobs2000 #' profile_id(a) <- paste0(profile_id(a), "_collapse") #' #' plot(c(a, b), color = "clay") -collapseHz <- function(x, pattern, hzdesgn = guessHzDesgnName(x, required = TRUE), ignore.case = FALSE, na.rm = FALSE) { +collapseHz <- function(x, + pattern, + hzdesgn = hzdesgnname(x, required = TRUE), + FUN = function(x, pattern, hzdesgn, ...) grepl(pattern, x[[hzdesgn]], ignore.case = FALSE), + ..., + na.rm = FALSE) { idn <- idname(x) hzd <- horizonDepths(x) if (!is.null(names(pattern))) { @@ -31,7 +56,7 @@ collapseHz <- function(x, pattern, hzdesgn = guessHzDesgnName(x, required = TRUE } for (p in seq(pattern)) { h <- data.table::data.table(horizons(x)) - l <- grepl(pattern[p], h[[hzdesgn]], ignore.case = ignore.case) + l <- FUN(x, pattern = pattern[p], hzdesgn = hzdesgn, na.rm = na.rm, ...) if (any(l)) { r <- rle(l) g <- unlist(sapply(seq(r$lengths), function(i) rep(i, r$lengths[i]))) diff --git a/man/collapseHz.Rd b/man/collapseHz.Rd index 1f461a1c..e03ed1c8 100644 --- a/man/collapseHz.Rd +++ b/man/collapseHz.Rd @@ -7,29 +7,54 @@ collapseHz( x, pattern, - hzdesgn = guessHzDesgnName(x, required = TRUE), - ignore.case = FALSE, + hzdesgn = hzdesgnname(x, required = TRUE), + FUN = function(x, pattern, hzdesgn, ...) grepl(pattern, x[[hzdesgn]], ignore.case = + FALSE), + ..., na.rm = FALSE ) } \arguments{ -\item{x}{A SoilProfileCollection} +\item{x}{A \emph{SoilProfileCollection}} -\item{pattern}{character. A regular expression pattern to match in \code{hzdesgn} column} +\item{pattern}{\emph{character}. A regular expression pattern to match in \code{hzdesgn} +column} -\item{hzdesgn}{character. Any character column containing horizon-level identifiers. Default is estimated using \code{guessHzDesgnName()}.} +\item{hzdesgn}{\emph{character}. Any character column containing horizon-level +identifiers. Default is estimated using \code{guessHzDesgnName()}.} -\item{ignore.case}{logical. If \code{FALSE}, the pattern matching is case sensitive and if \code{TRUE}, case is ignored during matching. Default: \code{FALSE}} +\item{FUN}{\emph{function}. A function that returns a \emph{logical} vector equal in +length to the number of horizons in \code{x}. See details.} -\item{na.rm}{logical. If \code{TRUE} \code{NA} values are ignored when calculating min/max boundaries for each group and in weighted averages. If \code{FALSE} \code{NA} values are propagated to the result. Default: \code{FALSE}} +\item{...}{Additional arguments passed to the matching function \code{FUN}.} + +\item{na.rm}{\emph{logical}. If \code{TRUE} \code{NA} values are ignored when calculating +min/max boundaries for each group and in weighted averages. If \code{FALSE} \code{NA} +values are propagated to the result. Default: \code{FALSE}} +} +\value{ +A \emph{SoilProfileCollection} } \description{ -Combines and aggregates layers by grouping adjacent horizons that match \code{pattern} in \code{hzdesgn}. Numeric properties are combined using the weighted average, and other properties are derived from the thickest horizon in each group. +Combines and aggregates data for layers by grouping adjacent horizons that +match \code{pattern} in \code{hzdesgn}. Numeric properties are combined using the +weighted average, and other properties are derived from the thickest horizon +in each group. +} +\details{ +If a custom function (\code{FUN}) is used, it should accept arbitrary additional +arguments via an ellipsis (\code{...}). It is not necessary to do anything with +arguments, but the result should match the number of horizons found in the +input SoilProfileCollection \code{x}. } \examples{ data(jacobs2000) -a <- collapseHz(jacobs2000, c(`A` = "^A", `E` = "E", `Bt` = "[ABC]+t", `C` = "^C", `foo` = "bar")) +a <- collapseHz(jacobs2000, c(`A` = "^A", + `E` = "E", + `Bt` = "[ABC]+t", + `C` = "^C", + `foo` = "bar")) b <- jacobs2000 profile_id(a) <- paste0(profile_id(a), "_collapse")