diff --git a/R/collapseHz.R b/R/collapseHz.R index 43c8f58a..53812193 100644 --- a/R/collapseHz.R +++ b/R/collapseHz.R @@ -13,6 +13,16 @@ #' @param FUN _function_. A function that returns a _logical_ vector equal in #' length to the number of horizons in `x`. See details. #' @param ... Additional arguments passed to the matching function `FUN`. +#' @param AGGFUN _list_. A named list containing custom aggregation functions. +#' List element names should match the column name that they transform. The +#' functions defined should take three arguments: `x` (a vector of horizon +#' property values), `top` (a vector of top depths), and `bottom` (a vector of +#' bottom depths). Default: `NULL` applies weighted.mean() to all numeric +#' columns not listed in `ignore_numerics` and takes the thickest value for all +#' other columns. +#' @param ignore_numerics _character_. Vector of column names that contain numeric +#' values which should _not_ be aggregated using `weighted.mean()`. For example, +#' soil color "value" and "chroma". #' @param na.rm _logical_. If `TRUE` `NA` values are ignored when calculating #' min/max boundaries for each group and in weighted averages. If `FALSE` `NA` #' values are propagated to the result. Default: `FALSE` @@ -39,11 +49,33 @@ #' profile_id(a) <- paste0(profile_id(a), "_collapse") #' #' plot(c(a, b), color = "clay") +#' +#' # custom aggregation function for matrix_color_munsell +#' +#' a2 <- collapseHz(jacobs2000, c(`A` = "^A", +#' `E` = "E", +#' `Bt` = "[ABC]+t", +#' `C` = "^C", +#' `foo` = "bar"), +#' AGGFUN = list(matrix_color_munsell = function(x, top, bottom) { +#' thk <- bottom - top +#' if (length(x) > 1) { +#' xord <- order(thk, decreasing = TRUE) +#' paste0(paste0(x[xord], " (t=", thk[xord], ")"), collapse = ", ") +#' } else x +#' }) +#' ) +#' profile_id(a2) <- paste0(profile_id(a), "_collapse_custom") +#' +#' unique(a2$matrix_color_munsell) +#' collapseHz <- function(x, pattern, hzdesgn = hzdesgnname(x, required = TRUE), FUN = function(x, pattern, hzdesgn, ...) grepl(pattern, x[[hzdesgn]], ignore.case = FALSE), ..., + AGGFUN = NULL, + ignore_numerics = NULL, na.rm = FALSE) { idn <- idname(x) hzd <- horizonDepths(x) @@ -60,16 +92,34 @@ collapseHz <- function(x, if (any(l)) { r <- rle(l) g <- unlist(sapply(seq(r$lengths), function(i) rep(i, r$lengths[i]))) - res <- h[g %in% unique(g[l]), c(list(hzdeptnew = min(.SD[[hzd[1]]], na.rm = na.rm), - hzdepbnew = max(.SD[[hzd[2]]], na.rm = na.rm)), - lapply(.SD, function(x, top, bottom) { - if (is.numeric(x)) { - weighted.mean(x, bottom - top, na.rm = na.rm) - } else { - x[which.max(bottom - top)[1]] - } - }, .SD[[hzd[1]]], .SD[[hzd[2]]])), - by = g[g %in% unique(g[l])]] + gidx <- g %in% unique(g[l]) + res <- h[gidx, c(list(hzdeptnew = min(.SD[[hzd[1]]], na.rm = na.rm), + hzdepbnew = max(.SD[[hzd[2]]], na.rm = na.rm)), + sapply(colnames(.SD)[!colnames(.SD) %in% hzd], + function(n, top, bottom) { + v <- .SD[[n]] + if (n %in% names(AGGFUN)) { + + # custom aggregation function (column name specific) + AGGFUN[[n]](v, top, bottom) + + } else if (!n %in% ignore_numerics && is.numeric(x)) { + + # weighted average by thickness (numerics not in exclusion list) + weighted.mean(v, bottom - top, na.rm = na.rm) + + } else { + # take thickest value + # v[which.max(bottom - top)[1]] + + # take dominant condition (based on sum of thickness) + cond <- aggregate(bottom - top, by = list(v), sum, na.rm = na.rm) + cond[[1]][which.max(cond[[2]])[1]] + } + }, + top = .SD[[hzd[1]]], + bottom = .SD[[hzd[2]]])), + by = g[gidx]] res$g <- NULL res[[hzdesgn]] <- labels[p] h <- h[-which(g %in% unique(g[l])),] diff --git a/man/collapseHz.Rd b/man/collapseHz.Rd index e03ed1c8..839a3d77 100644 --- a/man/collapseHz.Rd +++ b/man/collapseHz.Rd @@ -11,6 +11,8 @@ collapseHz( FUN = function(x, pattern, hzdesgn, ...) grepl(pattern, x[[hzdesgn]], ignore.case = FALSE), ..., + AGGFUN = NULL, + ignore_numerics = NULL, na.rm = FALSE ) } @@ -28,6 +30,18 @@ length to the number of horizons in \code{x}. See details.} \item{...}{Additional arguments passed to the matching function \code{FUN}.} +\item{AGGFUN}{\emph{list}. A named list containing custom aggregation functions. +List element names should match the column name that they transform. The +functions defined should take three arguments: \code{x} (a vector of horizon +property values), \code{top} (a vector of top depths), and \code{bottom} (a vector of +bottom depths). Default: \code{NULL} applies weighted.mean() to all numeric +columns not listed in \code{ignore_numerics} and takes the thickest value for all +other columns.} + +\item{ignore_numerics}{\emph{character}. Vector of column names that contain numeric +values which should \emph{not} be aggregated using \code{weighted.mean()}. For example, +soil color "value" and "chroma".} + \item{na.rm}{\emph{logical}. If \code{TRUE} \code{NA} values are ignored when calculating min/max boundaries for each group and in weighted averages. If \code{FALSE} \code{NA} values are propagated to the result. Default: \code{FALSE}} @@ -59,4 +73,24 @@ b <- jacobs2000 profile_id(a) <- paste0(profile_id(a), "_collapse") plot(c(a, b), color = "clay") + +# custom aggregation function for matrix_color_munsell + +a2 <- collapseHz(jacobs2000, c(`A` = "^A", + `E` = "E", + `Bt` = "[ABC]+t", + `C` = "^C", + `foo` = "bar"), + AGGFUN = list(matrix_color_munsell = function(x, top, bottom) { + thk <- bottom - top + if (length(x) > 1) { + xord <- order(thk, decreasing = TRUE) + paste0(paste0(x[xord], " (t=", thk[xord], ")"), collapse = ", ") + } else x + }) + ) +profile_id(a2) <- paste0(profile_id(a), "_collapse_custom") + +unique(a2$matrix_color_munsell) + }