Skip to content

Commit

Permalink
update aggregation methods
Browse files Browse the repository at this point in the history
- implement ignoring specific numeric columns with `ignore_numerics
- add argument`AGGFUN` for column name specific aggregations
- default for categories is to returndominant condition rather than just thickest layer
  • Loading branch information
brownag committed Oct 11, 2024
1 parent 7b30975 commit 494f917
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 10 deletions.
70 changes: 60 additions & 10 deletions R/collapseHz.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@
#' @param FUN _function_. A function that returns a _logical_ vector equal in
#' length to the number of horizons in `x`. See details.
#' @param ... Additional arguments passed to the matching function `FUN`.
#' @param AGGFUN _list_. A named list containing custom aggregation functions.
#' List element names should match the column name that they transform. The
#' functions defined should take three arguments: `x` (a vector of horizon
#' property values), `top` (a vector of top depths), and `bottom` (a vector of
#' bottom depths). Default: `NULL` applies weighted.mean() to all numeric
#' columns not listed in `ignore_numerics` and takes the thickest value for all
#' other columns.
#' @param ignore_numerics _character_. Vector of column names that contain numeric
#' values which should _not_ be aggregated using `weighted.mean()`. For example,
#' soil color "value" and "chroma".
#' @param na.rm _logical_. If `TRUE` `NA` values are ignored when calculating
#' min/max boundaries for each group and in weighted averages. If `FALSE` `NA`
#' values are propagated to the result. Default: `FALSE`
Expand All @@ -39,11 +49,33 @@
#' profile_id(a) <- paste0(profile_id(a), "_collapse")
#'
#' plot(c(a, b), color = "clay")
#'
#' # custom aggregation function for matrix_color_munsell
#'
#' a2 <- collapseHz(jacobs2000, c(`A` = "^A",
#' `E` = "E",
#' `Bt` = "[ABC]+t",
#' `C` = "^C",
#' `foo` = "bar"),
#' AGGFUN = list(matrix_color_munsell = function(x, top, bottom) {
#' thk <- bottom - top
#' if (length(x) > 1) {
#' xord <- order(thk, decreasing = TRUE)
#' paste0(paste0(x[xord], " (t=", thk[xord], ")"), collapse = ", ")
#' } else x
#' })
#' )
#' profile_id(a2) <- paste0(profile_id(a), "_collapse_custom")
#'
#' unique(a2$matrix_color_munsell)
#'
collapseHz <- function(x,
pattern,
hzdesgn = hzdesgnname(x, required = TRUE),
FUN = function(x, pattern, hzdesgn, ...) grepl(pattern, x[[hzdesgn]], ignore.case = FALSE),
...,
AGGFUN = NULL,
ignore_numerics = NULL,
na.rm = FALSE) {
idn <- idname(x)
hzd <- horizonDepths(x)
Expand All @@ -60,16 +92,34 @@ collapseHz <- function(x,
if (any(l)) {
r <- rle(l)
g <- unlist(sapply(seq(r$lengths), function(i) rep(i, r$lengths[i])))
res <- h[g %in% unique(g[l]), c(list(hzdeptnew = min(.SD[[hzd[1]]], na.rm = na.rm),
hzdepbnew = max(.SD[[hzd[2]]], na.rm = na.rm)),
lapply(.SD, function(x, top, bottom) {
if (is.numeric(x)) {
weighted.mean(x, bottom - top, na.rm = na.rm)
} else {
x[which.max(bottom - top)[1]]
}
}, .SD[[hzd[1]]], .SD[[hzd[2]]])),
by = g[g %in% unique(g[l])]]
gidx <- g %in% unique(g[l])
res <- h[gidx, c(list(hzdeptnew = min(.SD[[hzd[1]]], na.rm = na.rm),
hzdepbnew = max(.SD[[hzd[2]]], na.rm = na.rm)),
sapply(colnames(.SD)[!colnames(.SD) %in% hzd],
function(n, top, bottom) {
v <- .SD[[n]]
if (n %in% names(AGGFUN)) {

# custom aggregation function (column name specific)
AGGFUN[[n]](v, top, bottom)

} else if (!n %in% ignore_numerics && is.numeric(x)) {

# weighted average by thickness (numerics not in exclusion list)
weighted.mean(v, bottom - top, na.rm = na.rm)

} else {
# take thickest value
# v[which.max(bottom - top)[1]]

# take dominant condition (based on sum of thickness)
cond <- aggregate(bottom - top, by = list(v), sum, na.rm = na.rm)
cond[[1]][which.max(cond[[2]])[1]]
}
},
top = .SD[[hzd[1]]],
bottom = .SD[[hzd[2]]])),
by = g[gidx]]
res$g <- NULL
res[[hzdesgn]] <- labels[p]
h <- h[-which(g %in% unique(g[l])),]
Expand Down
34 changes: 34 additions & 0 deletions man/collapseHz.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 494f917

Please sign in to comment.