diff --git a/NAMESPACE b/NAMESPACE index b2e3032..6c6692e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -32,12 +32,13 @@ exportMethods("%power%") exportMethods("[") exportMethods("[<-") exportMethods("[[<-") +exportMethods("set_groups<-") exportMethods("set_totals<-") exportMethods(aggregate) +exportMethods(any_assigned) exportMethods(as_amounts) exportMethods(as_composition) exportMethods(as_graph) -exportMethods(augment) exportMethods(barplot) exportMethods(closure) exportMethods(condense) @@ -47,6 +48,7 @@ exportMethods(dist) exportMethods(get_groups) exportMethods(get_totals) exportMethods(hist) +exportMethods(is_assigned) exportMethods(mahalanobis) exportMethods(margin) exportMethods(mean) @@ -120,5 +122,4 @@ importFrom(utils,tail) importMethodsFrom(arkhe,describe) importMethodsFrom(arkhe,replace_NA) importMethodsFrom(arkhe,replace_zero) -importMethodsFrom(dimensio,augment) importMethodsFrom(dimensio,pca) diff --git a/R/AllClasses.R b/R/AllClasses.R index 7a06384..3d1293b 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -46,7 +46,7 @@ setClassUnion("index", members = c("logical", "numeric", "character")) #' An S4 class to represent compositional data. #' @slot totals A [`numeric`] vector to store the absolute row sums (before #' the closure of the compositions). -#' @slot extra A [`list`] of extra variables. +#' @slot groups A [`character`] vector to store the group names. #' @section Coerce: #' In the code snippets below, `x` is a `CompositionMatrix` object. #' \describe{ @@ -70,7 +70,7 @@ setClassUnion("index", members = c("logical", "numeric", "character")) Class = "CompositionMatrix", slots = c( totals = "numeric", - extra = "list" + groups = "character" ), contains = c("NumericMatrix") ) @@ -81,7 +81,7 @@ setClassUnion("index", members = c("logical", "numeric", "character")) #' S4 classes to represent log-ratio data transformations. #' @slot totals A [`numeric`] vector to store the absolute row sums (before #' the closure of the compositions). -#' @slot extra A [`list`] of extra variables. +#' @slot groups A [`character`] vector to store the group names. #' @slot parts A [`character`] vector to store the part names. #' @slot ratio A [`character`] vector to store the ratio names. #' @slot order An [`integer`] vector to store the original ordering of the @@ -107,7 +107,7 @@ setClassUnion("index", members = c("logical", "numeric", "character")) Class = "LogRatio", slots = c( totals = "numeric", - extra = "list", + groups = "character", parts = "character", ratio = "character", diff --git a/R/AllGenerics.R b/R/AllGenerics.R index c874a13..3828556 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -10,7 +10,6 @@ setGeneric("mahalanobis", package = "stats") #' @importMethodsFrom arkhe describe #' @importMethodsFrom arkhe replace_NA #' @importMethodsFrom arkhe replace_zero -#' @importMethodsFrom dimensio augment #' @importMethodsFrom dimensio pca NULL @@ -22,6 +21,8 @@ NULL #' @param parts A `vector` giving the index of the column to be used a #' compositional parts. If `NULL` (the default), all [`double`] columns will be #' used. +#' @param groups An [`integer`] giving the index of the column to be used to +#' group the samples. If `NULL` (the default), no grouping is stored. #' @param verbose A [`logical`] scalar: should \R report extra information #' on progress? #' @param ... Currently not used. @@ -55,21 +56,6 @@ setGeneric( valueClass = "matrix" ) -#' Augment Data with Extra Columns -#' -#' Adds columns from the original data. -#' @param x A [`CompositionMatrix-class`] or [`LogRatio-class`] object. -#' @param ... Currently not used. -#' @return -#' A [`data.frame`]. -#' @example inst/examples/ex-augment.R -#' @author N. Frerebeau -#' @docType methods -#' @family compositional data tools -#' @name augment -#' @rdname augment -NULL - #' Data Description #' #' Describes an object. @@ -202,9 +188,15 @@ setGeneric( #' #' Retrieves or defines the groups to which the observations belong. #' @param x An object from which to get or set `groups`. -# @param value A possible value for the `groups` of `x`. +#' @param value A possible value for the `groups` of `x`. #' @return +#' * `set_groups()` returns an object of the same sort as `x` with the new +#' group names assigned. #' * `get_groups()` returns a [`character`] vector giving the group names of `x`. +#' * `any_assigned()` returns a [`logical`] scalar specifying whether or not `x` +#' has groups. +#' * `is_assigned()` returns a [`logical`] vector specifying whether or not an +#' observation belongs to a group. #' @example inst/examples/ex-mutators.R #' @author N. Frerebeau #' @docType methods @@ -213,6 +205,20 @@ setGeneric( #' @rdname groups NULL +#' @rdname groups +#' @aliases any_assigned-method +setGeneric( + name = "any_assigned", + def = function(x) standardGeneric("any_assigned") +) + +#' @rdname groups +#' @aliases is_assigned-method +setGeneric( + name = "is_assigned", + def = function(x) standardGeneric("is_assigned") +) + #' @rdname groups #' @aliases get_groups-method setGeneric( @@ -220,6 +226,13 @@ setGeneric( def = function(x) standardGeneric("get_groups") ) +#' @rdname groups +#' @aliases set_groups-method +setGeneric( + name = "set_groups<-", + def = function(x, value) standardGeneric("set_groups<-") +) + #' Row Sums #' #' Retrieves or defines the row sums (before closure). @@ -621,8 +634,6 @@ NULL #' @param x A [`CompositionMatrix-class`] object. #' @param by A `vector` of grouping elements, as long as the variables in `x` #' (in the sense that [`as.factor(by)`][as.factor()] defines the grouping). -#' If a single `character` string is passed, it must be the name of a -#' categorical variable from the original dataset. #' @param ... Further arguments to be passed to [mean()]. #' @return A [`CompositionMatrix-class`] object. #' @seealso [mean()], [aggregate()] @@ -881,9 +892,7 @@ NULL #' Displays a compositional bar chart. #' @param height A [`CompositionMatrix-class`] object. #' @param groups A `vector` of grouping elements, as long as the variables in -#' `height`. If a single `character` string is passed, it must be the name of a -#' categorical variable from the original dataset. -#' If set, a matrix of panels defined by `groups` will be drawn. +#' `height`. If set, a matrix of panels defined by `groups` will be drawn. #' @param order An [`integer`] vector giving the index of the column to be used #' for the ordering of the data. #' @param decreasing A [`logical`] scalar: should the sort order be increasing @@ -955,8 +964,7 @@ NULL #' Displays a matrix of ternary plots. #' @param x A [`CompositionMatrix-class`] object. #' @param groups A `vector` of grouping elements, as long as the variables in -#' `x`. If a single `character` string is passed, it must be the name of a -#' categorical variable from the original dataset. +#' `x`. #' @param palette_color A palette [`function`] that when called with a single #' argument (`groups`) returns a `character` vector of colors. #' @param palette_symbol A palette [`function`] that when called with a single @@ -980,9 +988,7 @@ NULL #' Displays a density plot. #' @param x A [`LogRatio-class`] object. #' @param groups A `vector` of grouping elements, as long as the variables in -#' `x`. If a single `character` string is passed, it must be the name of a -#' categorical variable from the original dataset. -#' If set, a matrix of panels defined by `groups` will be drawn. +#' `x`. If set, a matrix of panels defined by `groups` will be drawn. #' @param palette_color A palette [`function`] that when called with a single #' argument (`groups`) returns a `character` vector of colors. #' @param rug A [`logical`] scalar: should a *rug* representation (1-d plot) of @@ -1159,8 +1165,7 @@ NULL #' with larger (squared) Mahalanobis distance are considered as potential #' outliers. #' @param groups A `vector` of grouping elements, as long as the variables in -#' `object`. If a single `character` string is passed, it must be the name of a -#' categorical variable from the original dataset. +#' `object`. #' @details #' An outlier can be defined as having a very large Mahalanobis distance from #' all observations. In this way, a certain proportion of the observations can diff --git a/R/barplot.R b/R/barplot.R index b8cdbf8..8d08353 100644 --- a/R/barplot.R +++ b/R/barplot.R @@ -5,7 +5,7 @@ NULL # CompositionMatrix ============================================================ #' @export #' @method barplot CompositionMatrix -barplot.CompositionMatrix <- function(height, ..., groups = NULL, +barplot.CompositionMatrix <- function(height, ..., groups = get_groups(height), order = NULL, decreasing = FALSE, horiz = TRUE, xlab = NULL, ylab = NULL, @@ -36,8 +36,7 @@ barplot.CompositionMatrix <- function(height, ..., groups = NULL, y_side <- if (horiz) 2 else 1 ## Grouping - groups <- get_variable(height, which = groups) - if (!is.null(groups) && !all(is.na(groups))) { + if (has_groups(groups)) { arkhe::assert_length(groups, nrow(z)) groups <- groups[ordering] diff --git a/R/coerce.R b/R/coerce.R index 4993a8e..44e7210 100644 --- a/R/coerce.R +++ b/R/coerce.R @@ -31,7 +31,8 @@ setMethod( totals <- rowSums(from, na.rm = TRUE) from <- from / totals - .CompositionMatrix(from, totals = unname(totals)) + grp <- rep(NA_character_, nrow(from)) + .CompositionMatrix(from, totals = unname(totals), groups = grp) } ) @@ -41,13 +42,21 @@ setMethod( setMethod( f = "as_composition", signature = c(from = "data.frame"), - definition = function(from, parts = NULL, + definition = function(from, parts = NULL, groups = NULL, verbose = getOption("nexus.verbose")) { ## Clean row/column names lab <- make_names(x = NULL, n = nrow(from), prefix = "S") rownames(from) <- if (has_rownames(from)) rownames(from) else lab colnames(from) <- make_names(x = colnames(from), n = ncol(from), prefix = "V") + ## Group names + grp <- rep(NA_character_, nrow(from)) + if (!is.null(groups)) { + grp <- from[, groups, drop = FALSE] + grp <- as.character(interaction(grp, sep = "_")) + grp[grp == ""] <- NA_character_ + } + ## Remove non-numeric columns if (is.null(parts)) { parts <- arkhe::detect(from, f = is.double, margin = 2) # Logical @@ -63,7 +72,6 @@ setMethod( if (is.character(parts)) parts <- colnames(from) %in% parts } coda <- from[, parts, drop = FALSE] - extra <- from[, !parts, drop = FALSE] arkhe::assert_filled(coda) ## Build matrix @@ -71,7 +79,7 @@ setMethod( totals <- rowSums(coda, na.rm = TRUE) coda <- coda / totals - .CompositionMatrix(coda, totals = unname(totals), extra = as.list(extra)) + .CompositionMatrix(coda, totals = unname(totals), groups = grp) } ) @@ -87,37 +95,6 @@ setMethod( } ) -# To features ================================================================== -#' @export -#' @rdname augment -#' @aliases augment,CompositionMatrix-method -setMethod( - f = "augment", - signature = c(x = "CompositionMatrix"), - definition = function(x, ...) { - if (has_extra(x)) { - data.frame(get_extra(x), x) - } else { - as.data.frame(x) - } - } -) - -#' @export -#' @rdname augment -#' @aliases augment,LogRatio-method -setMethod( - f = "augment", - signature = c(x = "LogRatio"), - definition = function(x, ...) { - if (has_extra(x)) { - data.frame(get_extra(x), x) - } else { - as.data.frame(x) - } - } -) - # To data.frame ================================================================ #' @method as.data.frame CompositionMatrix #' @export diff --git a/R/condense.R b/R/condense.R index 79cb353..a648159 100644 --- a/R/condense.R +++ b/R/condense.R @@ -8,11 +8,10 @@ NULL setMethod( f = "condense", signature = c("CompositionMatrix"), - definition = function(x, by, ...) { + definition = function(x, by = get_groups(x), ...) { m <- nrow(x) ## Grouping - by <- get_variable(x, which = by) arkhe::assert_length(by, m) by <- as.factor(by) @@ -29,8 +28,9 @@ setMethod( z <- do.call(rbind, z) tot <- tapply(X = get_totals(x), INDEX = by, FUN = mean, simplify = TRUE) + grp <- flatten_chr(x = get_groups(x), by = by) rownames(z) <- levels(by) - .CompositionMatrix(z, totals = as.numeric(tot)) + .CompositionMatrix(z, totals = as.numeric(tot), groups = grp) } ) diff --git a/R/describe.R b/R/describe.R index 21e7b0c..bbf3230 100644 --- a/R/describe.R +++ b/R/describe.R @@ -40,7 +40,17 @@ setMethod( spa <- arkhe::sparsity(x, count = FALSE) msg_spa <- sprintf("%s of values are zero.", label_percent(spa, digits = 1)) - cat(msg_tbl, sep = "\n* ") + ## Groups + groups <- get_groups(x) + grp <- unique(groups[!is.na(groups)]) + n_grp <- length(grp) + n_ung <- sum(is.na(groups)) + ls_grp <- if (n_grp == 0) "" else paste0(": ", paste0(dQuote(grp), collapse = ", ")) + msg_grp <- sprintf("%d %s%s.", n_grp, ngettext(n_grp, "group", "groups"), + ls_grp) + msg_ung <- sprintf("%d unassigned %s.", n_ung, ngettext(n_ung, "sample", "samples")) + + cat(msg_tbl, msg_grp, msg_ung, sep = "\n* ") cat("\nData checking:", msg_spa, msg_col_var, sep = "\n* ") cat("\nMissing values:", msg_row_NA, msg_col_NA, sep = "\n* ") diff --git a/R/mutators.R b/R/mutators.R index c90d46e..7f909a7 100644 --- a/R/mutators.R +++ b/R/mutators.R @@ -14,34 +14,65 @@ get_transformation <- function(x) { ) } -# Supplementary variables ====================================================== -get_extra <- function(x) { - x@extra +# Groups ======================================================================= +has_groups <- function(x) { + length(x) > 0 && !all(is.na(x)) } -has_extra <- function(x) { - extra <- get_extra(x) - length(extra) > 0 && all(lengths(extra) > 0) -} +#' @export +#' @rdname groups +#' @aliases is_assigned,CompositionMatrix-method +setMethod("is_assigned", "CompositionMatrix", function(x) !is.na(get_groups(x))) -get_variable <- function(x, which = NULL) { - if (is.character(which) && length(which) == 1) { - extra <- get_extra(x)[[which]] - if (is.null(extra)) { - warning(sprintf("There is no such variable: %s.", which), call. = FALSE) - } - return(extra) - } - which -} +#' @export +#' @rdname groups +#' @aliases is_assigned,LogRatio-method +setMethod("is_assigned", "LogRatio", function(x) !is.na(get_groups(x))) + +#' @export +#' @rdname groups +#' @aliases any_assigned,CompositionMatrix-method +setMethod("any_assigned", "CompositionMatrix", function(x) any(is_assigned(x))) + +#' @export +#' @rdname groups +#' @aliases any_assigned,LogRatio-method +setMethod("any_assigned", "LogRatio", function(x) any(is_assigned(x))) + +#' @export +#' @rdname groups +#' @aliases get_groups,CompositionMatrix-method +setMethod("get_groups", "CompositionMatrix", function(x) x@groups) + +#' @export +#' @rdname groups +#' @aliases get_groups,LogRatio-method +setMethod("get_groups", "LogRatio", function(x) x@groups) -# Groups ======================================================================= -# is_assigned any_assigned #' @export #' @rdname groups #' @aliases get_groups,OutlierIndex-method setMethod("get_groups", "OutlierIndex", function(x) x@groups) +#' @export +#' @rdname groups +#' @aliases set_groups,CompositionMatrix-method +setMethod( + f = "set_groups<-", + signature = "CompositionMatrix", + definition = function(x, value) { + if (is.null(value)) { + x@groups <- rep(NA_character_, nrow(x)) + } else { + value <- as.character(value) + value[value == ""] <- NA_character_ + x@groups <- value + } + methods::validObject(x) + x + } +) + # Totals ======================================================================= #' @export #' @rdname totals diff --git a/R/nexus-internal.R b/R/nexus-internal.R index 28a913a..eb9e6ef 100644 --- a/R/nexus-internal.R +++ b/R/nexus-internal.R @@ -9,6 +9,18 @@ has_rownames <- function(x) { !is.na(.row_names_info(x, type = 0L)[[1L]]) } +make_names <- function(x, n = length(x), prefix = "X") { + x <- if (n > 0) x %||% paste0(prefix, seq_len(n)) else character(0) + x <- make.unique(x, sep = "_") + x +} + +flatten_chr <- function(x, by) { + z <- tapply(X = x, INDEX = by, FUN = unique, simplify = FALSE) + z <- vapply(X = z, FUN = paste0, FUN.VALUE = character(1), collapse = ":") + z +} + #' Label Percentages #' #' @param x A [`numeric`] vector. @@ -29,38 +41,3 @@ label_percent <- function(x, digits = NULL, trim = FALSE) { x[i] <- y x } - -flatten_chr <- function(x, by) { - z <- tapply(X = x, INDEX = by, FUN = unique, simplify = FALSE) - z <- vapply(X = z, FUN = paste0, FUN.VALUE = character(1), collapse = ":") - z -} - -is_replicated <- function(x) { - duplicated(x, fromLast = FALSE) | duplicated(x, fromLast = TRUE) -} -any_replicated <- function(x) { - any(is_replicated(x)) -} - -make_codes <- function(x) { - if (!any(duplicated(x))) return(x) - x <- split(x = seq_along(x), f = x) - nm <- rep(names(x), lengths(x)) - nm <- tapply( - X = nm, - INDEX = nm, - FUN = function(x) paste(x, seq_along(x), sep = "_"), - simplify = FALSE - ) - - x <- unlist(x, use.names = FALSE) - nm <- unlist(nm, use.names = FALSE) - nm[x] -} - -make_names <- function(x, n = length(x), prefix = "X") { - x <- if (n > 0) x %||% paste0(prefix, seq_len(n)) else character(0) - x <- make.unique(x, sep = "_") - x -} diff --git a/R/outliers.R b/R/outliers.R index 2cd1907..19af1ce 100644 --- a/R/outliers.R +++ b/R/outliers.R @@ -9,7 +9,7 @@ NULL setMethod( f = "outliers", signature = c(object = "CompositionMatrix"), - definition = function(object, ..., groups = NULL, + definition = function(object, ..., groups = get_groups(object), method = c("mve", "mcd"), quantile = 0.975) { ## Transformation z <- transform_ilr(object) @@ -17,14 +17,13 @@ setMethod( ## Grouping m <- nrow(z) p <- ncol(z) - groups <- get_variable(object, which = groups) - if (is.null(groups) || all(is.na(groups))) { - grp <- list(z) - groups <- list(seq_len(m)) - } else { + if (has_groups(groups)) { arkhe::assert_length(groups, m) grp <- split(z, f = groups) groups <- split(seq_len(m), f = groups) + } else { + grp <- list(z) + groups <- list(seq_len(m)) } ## Clean diff --git a/R/pca.R b/R/pca.R index 4978d64..ba4dfa7 100644 --- a/R/pca.R +++ b/R/pca.R @@ -24,8 +24,6 @@ setMethod( definition = function(object, center = TRUE, scale = FALSE, rank = NULL, sup_row = NULL, sup_col = NULL, weight_row = NULL, weight_col = NULL) { - z <- methods::callNextMethod() - z@extra <- get_extra(object) - z + methods::callNextMethod() } ) diff --git a/R/plot.R b/R/plot.R index f933968..137cbae 100644 --- a/R/plot.R +++ b/R/plot.R @@ -5,12 +5,11 @@ NULL # CompositionMatrix ============================================================ #' @export #' @method plot CompositionMatrix -plot.CompositionMatrix <- function(x, ..., margin = NULL, groups = NULL, +plot.CompositionMatrix <- function(x, ..., margin = NULL, groups = get_groups(x), palette_color = palette_color_discrete(), palette_symbol = palette_shape()) { ## Grouping - groups <- get_variable(x, which = groups) - if (!is.null(groups) && !all(is.na(groups))) { + if (has_groups(groups)) { arkhe::assert_length(groups, nrow(x)) col <- palette_color(groups) pch <- palette_symbol(groups) @@ -31,7 +30,7 @@ setMethod("plot", c(x = "CompositionMatrix", y = "missing"), plot.CompositionMat # LogRatio ===================================================================== #' @export #' @method plot LogRatio -plot.LogRatio <- function(x, ..., groups = NULL, +plot.LogRatio <- function(x, ..., groups = get_groups(x), palette_color = palette_color_discrete(), rug = TRUE, ticksize = 0.05, ncol = NULL, flip = FALSE, @@ -49,16 +48,15 @@ plot.LogRatio <- function(x, ..., groups = NULL, nrow <- ceiling(p / ncol) ## Grouping - groups <- get_variable(x, which = groups) - if (is.null(groups) || all(is.na(groups))) { - grp <- list(all = z) - groups <- rep("all", m) - border <- list(...)$border %||% graphics::par("col") - } else { + if (has_groups(groups)) { arkhe::assert_length(groups, m) grp <- split(z, f = groups) border <- palette_color(names(grp)) rug <- FALSE + } else { + grp <- list(all = z) + groups <- rep("all", m) + border <- list(...)$border %||% graphics::par("col") } k <- length(grp) diff --git a/R/simplex.R b/R/simplex.R index 554b5f7..2243725 100644 --- a/R/simplex.R +++ b/R/simplex.R @@ -40,7 +40,7 @@ setMethod( z <- x * y z <- as_composition(z) - z@extra <- get_extra(x) + set_groups(z) <- get_groups(x) z } @@ -92,7 +92,7 @@ setMethod( arkhe::assert_length(y, 1L) z <- x ^ y z <- as_composition(z) - z@extra <- get_extra(x) + set_groups(z) <- get_groups(x) z } ) diff --git a/R/subset.R b/R/subset.R index a7cab63..4eedb2b 100644 --- a/R/subset.R +++ b/R/subset.R @@ -12,11 +12,7 @@ NULL if (missing(i)) i <- seq_len(nrow(x)) if (is.character(i)) i <- match(i, dimnames(x)[1L]) totals <- get_totals(x)[i] - if (has_extra(x)) { - extra <- lapply(X = get_extra(x), FUN = function(val, i) { val[i] }, i = i) - } else { - extra <- list() - } + groups <- get_groups(x)[i] ## Columns if (missing(j)) j <- seq_len(ncol(x)) @@ -32,7 +28,7 @@ NULL # z <- z / tot # } - methods::initialize(x, z, totals = totals, extra = extra) + methods::initialize(x, z, totals = totals, groups = groups) } wrong_dimensions <- function(i, j) { diff --git a/R/transform_alr.R b/R/transform_alr.R index 5f24b5a..a2e50e2 100644 --- a/R/transform_alr.R +++ b/R/transform_alr.R @@ -37,7 +37,7 @@ setMethod( base = base, weights = w, totals = get_totals(object), - extra = get_extra(object) + groups = get_groups(object) ) } ) diff --git a/R/transform_clr.R b/R/transform_clr.R index e5752f3..f9b68f7 100644 --- a/R/transform_clr.R +++ b/R/transform_clr.R @@ -32,7 +32,7 @@ setMethod( base = base, weights = w, totals = get_totals(object), - extra = get_extra(object) + groups = get_groups(object) ) } ) diff --git a/R/transform_ilr.R b/R/transform_ilr.R index 6a15638..0d75a1e 100644 --- a/R/transform_ilr.R +++ b/R/transform_ilr.R @@ -69,7 +69,7 @@ setMethod( base = base, weights = rep(1 / D, D), totals = get_totals(object), - extra = get_extra(object) + groups = get_groups(object) ) } ) diff --git a/R/transform_inverse.R b/R/transform_inverse.R index 49af5ce..73b4e12 100644 --- a/R/transform_inverse.R +++ b/R/transform_inverse.R @@ -19,7 +19,7 @@ setMethod( .CompositionMatrix( y, totals = get_totals(object), - extra = get_extra(object) + groups = get_groups(object) ) } ) @@ -43,7 +43,7 @@ setMethod( .CompositionMatrix( y, totals = get_totals(object), - extra = get_extra(object) + groups = get_groups(object) ) } ) @@ -66,7 +66,7 @@ setMethod( .CompositionMatrix( y, totals = get_totals(object), - extra = get_extra(object) + groups = get_groups(object) ) } ) diff --git a/R/transform_lr.R b/R/transform_lr.R index c9ac59d..40b751e 100644 --- a/R/transform_lr.R +++ b/R/transform_lr.R @@ -37,7 +37,7 @@ setMethod( order = seq_len(J), weights = w, totals = get_totals(object), - extra = get_extra(object) + groups = get_groups(object) ) } ) diff --git a/R/transform_plr.R b/R/transform_plr.R index a62ab0f..5d6527f 100644 --- a/R/transform_plr.R +++ b/R/transform_plr.R @@ -49,7 +49,7 @@ setMethod( base = H, weights = rep(1 / J, J), totals = get_totals(object), - extra = get_extra(object) + groups = get_groups(object) ) } ) diff --git a/R/validate.R b/R/validate.R index 17828bf..30de0da 100644 --- a/R/validate.R +++ b/R/validate.R @@ -18,11 +18,11 @@ setValidity( ## Get data n <- nrow(object) totals <- object@totals - extra <- object@extra + groups <- object@groups cnd <- list( arkhe::validate(arkhe::assert_length(totals, n, empty = FALSE)), - # arkhe::validate(arkhe::assert_lengths(extra, n)), + arkhe::validate(arkhe::assert_length(groups, n, empty = FALSE)), arkhe::validate(arkhe::assert_positive(object, strict = FALSE, na.rm = TRUE)) ) arkhe::check_class(object, cnd) @@ -41,7 +41,7 @@ setValidity( weights <- object@weights totals <- object@totals - extra <- object@extra + groups <- object@groups n <- nrow(object) m <- length(parts) @@ -50,8 +50,7 @@ setValidity( arkhe::validate(arkhe::assert_missing(object)), arkhe::validate(arkhe::assert_infinite(object)), arkhe::validate(arkhe::assert_length(totals, n, empty = FALSE)), - # arkhe::validate(arkhe::assert_lengths(extra, n)), - + arkhe::validate(arkhe::assert_length(groups, n, empty = FALSE)), arkhe::validate(arkhe::assert_length(order, m, empty = FALSE)) ) arkhe::check_class(object, cnd) diff --git a/inst/examples/ex-augment.R b/inst/examples/ex-augment.R deleted file mode 100644 index a8c1e08..0000000 --- a/inst/examples/ex-augment.R +++ /dev/null @@ -1,11 +0,0 @@ -## Data from Aitchison 1986 -data("arctic") - -## Coerce to compositional data -coda <- as_composition(arctic, parts = 1:3) - -## Isometric log-ratio -ilr <- transform_ilr(coda) - -## Add extra column -augment(ilr) diff --git a/inst/examples/ex-barplot.R b/inst/examples/ex-barplot.R index 9747196..18e58cb 100644 --- a/inst/examples/ex-barplot.R +++ b/inst/examples/ex-barplot.R @@ -10,8 +10,8 @@ barplot(coda, order = 2) ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos) +coda <- as_composition(kommos, groups = 1) ## Use ceramic types for grouping -barplot(coda, groups = "type", order = 1) -barplot(coda, groups = "type", order = 1, horiz = FALSE) +barplot(coda, order = 1) +barplot(coda, order = 1, horiz = FALSE) diff --git a/inst/examples/ex-condense.R b/inst/examples/ex-condense.R index 17f87b4..4b09494 100644 --- a/inst/examples/ex-condense.R +++ b/inst/examples/ex-condense.R @@ -2,7 +2,7 @@ data("slides") ## Coerce to a compositional matrix -coda <- as_composition(slides) +coda <- as_composition(slides, groups = 2) ## Compositional mean by group -condense(coda, by = "slide") +condense(coda) diff --git a/inst/examples/ex-density.R b/inst/examples/ex-density.R index 50cd523..0eb211c 100644 --- a/inst/examples/ex-density.R +++ b/inst/examples/ex-density.R @@ -1,7 +1,7 @@ ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos) +coda <- as_composition(kommos, groups = 1) ## Log ratio clr <- transform_clr(coda) @@ -10,4 +10,4 @@ clr <- transform_clr(coda) plot(clr, groups = NULL, flip = TRUE) ## Use ceramic types for grouping -plot(clr, groups = "type", flip = TRUE) +plot(clr, flip = TRUE) diff --git a/inst/examples/ex-describe.R b/inst/examples/ex-describe.R index 088f4ea..a2090e7 100644 --- a/inst/examples/ex-describe.R +++ b/inst/examples/ex-describe.R @@ -2,7 +2,7 @@ data("slides") ## Coerce to compositional data -coda <- as_composition(slides) +coda <- as_composition(slides, groups = 2) ## Quick description describe(coda) diff --git a/inst/examples/ex-mutators.R b/inst/examples/ex-mutators.R index 2f4c785..8bb5118 100644 --- a/inst/examples/ex-mutators.R +++ b/inst/examples/ex-mutators.R @@ -3,5 +3,6 @@ data("slides") head(slides) ## Coerce to compositional data -coda <- as_composition(slides) -head(augment(coda)) +coda <- as_composition(slides, groups = 2) + +get_groups(coda) diff --git a/inst/examples/ex-outliers.R b/inst/examples/ex-outliers.R index 45aac82..e8bf81b 100644 --- a/inst/examples/ex-outliers.R +++ b/inst/examples/ex-outliers.R @@ -1,7 +1,7 @@ ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, parts = 3:17) +coda <- as_composition(kommos, parts = 3:17, groups = 1) ## Detect outliers out <- outliers(coda, groups = NULL, method = "mcd") @@ -12,7 +12,7 @@ plot(out, type = "qqplot") ## Detect outliers by group ## (use ceramic types for grouping) -out <- outliers(coda, groups = "type", method = "mcd") +out <- outliers(coda, method = "mcd") plot(out, type = "dotchart", select = 1, robust = FALSE) plot(out, type = "dotchart", select = 2, robust = FALSE) diff --git a/inst/examples/ex-pca.R b/inst/examples/ex-pca.R index fff8277..1c26f26 100644 --- a/inst/examples/ex-pca.R +++ b/inst/examples/ex-pca.R @@ -1,7 +1,7 @@ ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos) # Use ceramic types for grouping +coda <- as_composition(kommos, groups = 1) # Use ceramic types for grouping ## Centered log-ratio clr <- transform_clr(coda) @@ -10,5 +10,5 @@ clr <- transform_clr(coda) X <- pca(clr, scale = FALSE) ## Explore results -viz_individuals(X, highlight = "type", pch = 16) +viz_individuals(X, highlight = get_groups(coda), pch = 16) viz_variables(X) diff --git a/inst/examples/ex-plot.R b/inst/examples/ex-plot.R index b73e684..bfc39c6 100644 --- a/inst/examples/ex-plot.R +++ b/inst/examples/ex-plot.R @@ -1,10 +1,10 @@ ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, parts = 3:8) +coda <- as_composition(kommos, parts = 3:8, groups = 1) ## Use ceramic types for grouping -plot(coda, groups = "type") +plot(coda) ## Center and scale ternary plots plot(coda, groups = NULL, center = TRUE, scale = TRUE) diff --git a/inst/tinytest/_snaps/aggregate_group.rds b/inst/tinytest/_snaps/aggregate.rds similarity index 100% rename from inst/tinytest/_snaps/aggregate_group.rds rename to inst/tinytest/_snaps/aggregate.rds diff --git a/inst/tinytest/_snaps/augment_clr.rds b/inst/tinytest/_snaps/augment_clr.rds deleted file mode 100644 index bc3f813..0000000 Binary files a/inst/tinytest/_snaps/augment_clr.rds and /dev/null differ diff --git a/inst/tinytest/_snaps/augment_coda.rds b/inst/tinytest/_snaps/augment_coda.rds deleted file mode 100644 index 47e9621..0000000 Binary files a/inst/tinytest/_snaps/augment_coda.rds and /dev/null differ diff --git a/inst/tinytest/_snaps/coerce.rds b/inst/tinytest/_snaps/coerce.rds new file mode 100644 index 0000000..a8a188e Binary files /dev/null and b/inst/tinytest/_snaps/coerce.rds differ diff --git a/inst/tinytest/_snaps/condense.rds b/inst/tinytest/_snaps/condense.rds index 3283447..55f1bad 100644 Binary files a/inst/tinytest/_snaps/condense.rds and b/inst/tinytest/_snaps/condense.rds differ diff --git a/inst/tinytest/_snaps/margin.rds b/inst/tinytest/_snaps/margin.rds index 5078597..8d4b9b6 100644 Binary files a/inst/tinytest/_snaps/margin.rds and b/inst/tinytest/_snaps/margin.rds differ diff --git a/inst/tinytest/_snaps/missing_multiplicative.rds b/inst/tinytest/_snaps/missing_multiplicative.rds index 82bc622..450b8ef 100644 Binary files a/inst/tinytest/_snaps/missing_multiplicative.rds and b/inst/tinytest/_snaps/missing_multiplicative.rds differ diff --git a/inst/tinytest/_snaps/scale.rds b/inst/tinytest/_snaps/scale.rds index 892cc1c..2e415a9 100644 Binary files a/inst/tinytest/_snaps/scale.rds and b/inst/tinytest/_snaps/scale.rds differ diff --git a/inst/tinytest/_snaps/transform_alr.rds b/inst/tinytest/_snaps/transform_alr.rds index 7a0a8fb..11527b5 100644 Binary files a/inst/tinytest/_snaps/transform_alr.rds and b/inst/tinytest/_snaps/transform_alr.rds differ diff --git a/inst/tinytest/_snaps/transform_clr.rds b/inst/tinytest/_snaps/transform_clr.rds index 8f3a728..386d0af 100644 Binary files a/inst/tinytest/_snaps/transform_clr.rds and b/inst/tinytest/_snaps/transform_clr.rds differ diff --git a/inst/tinytest/_snaps/transform_ilr.rds b/inst/tinytest/_snaps/transform_ilr.rds index 7921c3e..0e7bef3 100644 Binary files a/inst/tinytest/_snaps/transform_ilr.rds and b/inst/tinytest/_snaps/transform_ilr.rds differ diff --git a/inst/tinytest/_snaps/transform_lr.rds b/inst/tinytest/_snaps/transform_lr.rds index 22c9011..451210d 100644 Binary files a/inst/tinytest/_snaps/transform_lr.rds and b/inst/tinytest/_snaps/transform_lr.rds differ diff --git a/inst/tinytest/_snaps/transform_plr.rds b/inst/tinytest/_snaps/transform_plr.rds index 1d7c3e9..15c2313 100644 Binary files a/inst/tinytest/_snaps/transform_plr.rds and b/inst/tinytest/_snaps/transform_plr.rds differ diff --git a/inst/tinytest/_snaps/zero_multiplicative.rds b/inst/tinytest/_snaps/zero_multiplicative.rds index 4f3db45..f8c189e 100644 Binary files a/inst/tinytest/_snaps/zero_multiplicative.rds and b/inst/tinytest/_snaps/zero_multiplicative.rds differ diff --git a/inst/tinytest/test_coerce.R b/inst/tinytest/test_coerce.R index f6d02bd..2016378 100644 --- a/inst/tinytest/test_coerce.R +++ b/inst/tinytest/test_coerce.R @@ -1,10 +1,7 @@ # Data with groups ============================================================= data("slides") -coda <- as_composition(slides) -expect_equal_to_reference(augment(coda), file = "_snaps/augment_coda.rds") - -clr <- transform_clr(coda, weights = FALSE) -expect_equal_to_reference(clr, file = "_snaps/augment_clr.rds") +coda <- as_composition(slides, group = 1) +expect_equal_to_reference(coda, file = "_snaps/coerce.rds") # Back transform to count ====================================================== data("hongite") diff --git a/inst/tinytest/test_condense.R b/inst/tinytest/test_condense.R index d4cb395..05c23b4 100644 --- a/inst/tinytest/test_condense.R +++ b/inst/tinytest/test_condense.R @@ -1,10 +1,8 @@ data("slides") -coda <- as_composition(slides) +coda <- as_composition(slides, groups = 2) ## Compositional mean by sample -by <- c("A", "A", "C", "D", "B", "E", "C", "B", "E", "D", "C", "E", - "B", "E", "C", "D", "B", "C", "A", "B", "A", "C", "B", "A", "E") -flat <- condense(coda, by = by) +flat <- condense(coda, by = get_groups(coda)) expect_equal_to_reference(as.data.frame(flat), file = "_snaps/condense.rds") ## With zeros diff --git a/inst/tinytest/test_mutators.R b/inst/tinytest/test_mutators.R index 6183c7a..f6ad894 100644 --- a/inst/tinytest/test_mutators.R +++ b/inst/tinytest/test_mutators.R @@ -1,6 +1,22 @@ +# CompositionMatrix groups ===================================================== data("hongite") coda <- as_composition(hongite) +expect_equal(get_groups(coda), rep(NA_character_, nrow(coda))) +expect_false(any_assigned(coda)) + +set_groups(coda) <- rep(c("A", "B", "C", "D", NA), each = 5) +expect_equal(get_groups(coda), rep(c("A", "B", "C", "D", NA), each = 5)) +expect_true(any_assigned(coda)) +expect_equal(is_assigned(coda), rep(c(TRUE, FALSE), c(20, 5))) + +set_groups(coda) <- NULL +expect_false(any_assigned(coda)) + +# Invalid values +# Try wrong length +expect_error(set_groups(coda) <- LETTERS, class = "arkhe_error_class") + # CompositionMatrix totals ===================================================== mtx <- matrix(sample(1:100, 75, TRUE), ncol = 5) coda <- as_composition(mtx) diff --git a/inst/tinytest/test_statistics.R b/inst/tinytest/test_statistics.R index 4c5505f..bbbb5b6 100644 --- a/inst/tinytest/test_statistics.R +++ b/inst/tinytest/test_statistics.R @@ -3,7 +3,7 @@ data("slides") petro <- as_composition(slides) expect_equal_to_reference(aggregate(petro, by = slides$analyst, FUN = mean), - file = "_snaps/aggregate_group.rds") + file = "_snaps/aggregate.rds") # Mean ========================================================================= expect_equal(nexus:::gmean(c(7.72, 0, 3.11, 7.19), zero.rm = FALSE), 0) diff --git a/man/CompositionMatrix-class.Rd b/man/CompositionMatrix-class.Rd index 2ded9b1..7764403 100644 --- a/man/CompositionMatrix-class.Rd +++ b/man/CompositionMatrix-class.Rd @@ -14,7 +14,7 @@ An S4 class to represent compositional data. \item{\code{totals}}{A \code{\link{numeric}} vector to store the absolute row sums (before the closure of the compositions).} -\item{\code{extra}}{A \code{\link{list}} of extra variables.} +\item{\code{groups}}{A \code{\link{character}} vector to store the group names.} }} \note{ diff --git a/man/LogRatio-class.Rd b/man/LogRatio-class.Rd index e7939d0..be8a9df 100644 --- a/man/LogRatio-class.Rd +++ b/man/LogRatio-class.Rd @@ -24,7 +24,7 @@ S4 classes to represent log-ratio data transformations. \item{\code{totals}}{A \code{\link{numeric}} vector to store the absolute row sums (before the closure of the compositions).} -\item{\code{extra}}{A \code{\link{list}} of extra variables.} +\item{\code{groups}}{A \code{\link{character}} vector to store the group names.} \item{\code{parts}}{A \code{\link{character}} vector to store the part names.} diff --git a/man/as_amounts.Rd b/man/as_amounts.Rd index 5ca89a9..755969d 100644 --- a/man/as_amounts.Rd +++ b/man/as_amounts.Rd @@ -41,8 +41,7 @@ head(X) } \seealso{ Other compositional data tools: -\code{\link{as_composition}()}, -\code{\link{augment}()} +\code{\link{as_composition}()} } \author{ N. Frerebeau diff --git a/man/as_composition.Rd b/man/as_composition.Rd index 9c93ce4..1f8988a 100644 --- a/man/as_composition.Rd +++ b/man/as_composition.Rd @@ -15,7 +15,12 @@ as_composition(from, ...) \S4method{as_composition}{matrix}(from) -\S4method{as_composition}{data.frame}(from, parts = NULL, verbose = getOption("nexus.verbose")) +\S4method{as_composition}{data.frame}( + from, + parts = NULL, + groups = NULL, + verbose = getOption("nexus.verbose") +) } \arguments{ \item{from}{A \code{\link{matrix}} or \code{\link{data.frame}} to be coerced.} @@ -26,6 +31,9 @@ as_composition(from, ...) compositional parts. If \code{NULL} (the default), all \code{\link{double}} columns will be used.} +\item{groups}{An \code{\link{integer}} giving the index of the column to be used to +group the samples. If \code{NULL} (the default), no grouping is stored.} + \item{verbose}{A \code{\link{logical}} scalar: should \R report extra information on progress?} } @@ -57,8 +65,7 @@ head(X) } \seealso{ Other compositional data tools: -\code{\link{as_amounts}()}, -\code{\link{augment}()} +\code{\link{as_amounts}()} } \author{ N. Frerebeau diff --git a/man/augment.Rd b/man/augment.Rd deleted file mode 100644 index accf0a8..0000000 --- a/man/augment.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/AllGenerics.R, R/coerce.R -\docType{methods} -\name{augment} -\alias{augment} -\alias{augment,CompositionMatrix-method} -\alias{augment,LogRatio-method} -\title{Augment Data with Extra Columns} -\usage{ -\S4method{augment}{CompositionMatrix}(x, ...) - -\S4method{augment}{LogRatio}(x, ...) -} -\arguments{ -\item{x}{A \code{\linkS4class{CompositionMatrix}} or \code{\linkS4class{LogRatio}} object.} - -\item{...}{Currently not used.} -} -\value{ -A \code{\link{data.frame}}. -} -\description{ -Adds columns from the original data. -} -\examples{ -## Data from Aitchison 1986 -data("arctic") - -## Coerce to compositional data -coda <- as_composition(arctic, parts = 1:3) - -## Isometric log-ratio -ilr <- transform_ilr(coda) - -## Add extra column -augment(ilr) -} -\seealso{ -Other compositional data tools: -\code{\link{as_amounts}()}, -\code{\link{as_composition}()} -} -\author{ -N. Frerebeau -} -\concept{compositional data tools} diff --git a/man/barplot.Rd b/man/barplot.Rd index 77c6a15..d6ed9c4 100644 --- a/man/barplot.Rd +++ b/man/barplot.Rd @@ -9,7 +9,7 @@ \S4method{barplot}{CompositionMatrix}( height, ..., - groups = NULL, + groups = get_groups(height), order = NULL, decreasing = FALSE, horiz = TRUE, @@ -29,9 +29,7 @@ \item{...}{Further parameters to be passed to \code{\link[graphics:barplot]{graphics::barplot()}}.} \item{groups}{A \code{vector} of grouping elements, as long as the variables in -\code{height}. If a single \code{character} string is passed, it must be the name of a -categorical variable from the original dataset. -If set, a matrix of panels defined by \code{groups} will be drawn.} +\code{height}. If set, a matrix of panels defined by \code{groups} will be drawn.} \item{order}{An \code{\link{integer}} vector giving the index of the column to be used for the ordering of the data.} @@ -80,11 +78,11 @@ barplot(coda, order = 2) ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos) +coda <- as_composition(kommos, groups = 1) ## Use ceramic types for grouping -barplot(coda, groups = "type", order = 1) -barplot(coda, groups = "type", order = 1, horiz = FALSE) +barplot(coda, order = 1) +barplot(coda, order = 1, horiz = FALSE) } \seealso{ Other plot methods: diff --git a/man/condense.Rd b/man/condense.Rd index c6c31bc..ab9ec32 100644 --- a/man/condense.Rd +++ b/man/condense.Rd @@ -9,7 +9,7 @@ \usage{ condense(x, ...) -\S4method{condense}{CompositionMatrix}(x, by, ...) +\S4method{condense}{CompositionMatrix}(x, by = get_groups(x), ...) } \arguments{ \item{x}{A \code{\linkS4class{CompositionMatrix}} object.} @@ -17,9 +17,7 @@ condense(x, ...) \item{...}{Further arguments to be passed to \code{\link[=mean]{mean()}}.} \item{by}{A \code{vector} of grouping elements, as long as the variables in \code{x} -(in the sense that \code{\link[=as.factor]{as.factor(by)}} defines the grouping). -If a single \code{character} string is passed, it must be the name of a -categorical variable from the original dataset.} +(in the sense that \code{\link[=as.factor]{as.factor(by)}} defines the grouping).} } \value{ A \code{\linkS4class{CompositionMatrix}} object. @@ -32,10 +30,10 @@ Splits the data into subsets and computes compositional mean for each. data("slides") ## Coerce to a compositional matrix -coda <- as_composition(slides) +coda <- as_composition(slides, groups = 2) ## Compositional mean by group -condense(coda, by = "slide") +condense(coda) } \seealso{ \code{\link[=mean]{mean()}}, \code{\link[=aggregate]{aggregate()}} diff --git a/man/describe.Rd b/man/describe.Rd index a0ae26c..5646069 100644 --- a/man/describe.Rd +++ b/man/describe.Rd @@ -22,7 +22,7 @@ Describes an object. data("slides") ## Coerce to compositional data -coda <- as_composition(slides) +coda <- as_composition(slides, groups = 2) ## Quick description describe(coda) diff --git a/man/groups.Rd b/man/groups.Rd index 582f540..878dfaf 100644 --- a/man/groups.Rd +++ b/man/groups.Rd @@ -3,21 +3,63 @@ \docType{methods} \name{groups} \alias{groups} +\alias{any_assigned} +\alias{any_assigned-method} +\alias{is_assigned} +\alias{is_assigned-method} \alias{get_groups} \alias{get_groups-method} +\alias{set_groups<-} +\alias{set_groups-method} +\alias{is_assigned,CompositionMatrix-method} +\alias{is_assigned,LogRatio-method} +\alias{any_assigned,CompositionMatrix-method} +\alias{any_assigned,LogRatio-method} +\alias{get_groups,CompositionMatrix-method} +\alias{get_groups,LogRatio-method} \alias{get_groups,OutlierIndex-method} +\alias{set_groups<-,CompositionMatrix-method} +\alias{set_groups,CompositionMatrix-method} \title{Working With Groups} \usage{ +any_assigned(x) + +is_assigned(x) + get_groups(x) +set_groups(x) <- value + +\S4method{is_assigned}{CompositionMatrix}(x) + +\S4method{is_assigned}{LogRatio}(x) + +\S4method{any_assigned}{CompositionMatrix}(x) + +\S4method{any_assigned}{LogRatio}(x) + +\S4method{get_groups}{CompositionMatrix}(x) + +\S4method{get_groups}{LogRatio}(x) + \S4method{get_groups}{OutlierIndex}(x) + +\S4method{set_groups}{CompositionMatrix}(x) <- value } \arguments{ \item{x}{An object from which to get or set \code{groups}.} + +\item{value}{A possible value for the \code{groups} of \code{x}.} } \value{ \itemize{ +\item \code{set_groups()} returns an object of the same sort as \code{x} with the new +group names assigned. \item \code{get_groups()} returns a \code{\link{character}} vector giving the group names of \code{x}. +\item \code{any_assigned()} returns a \code{\link{logical}} scalar specifying whether or not \code{x} +has groups. +\item \code{is_assigned()} returns a \code{\link{logical}} vector specifying whether or not an +observation belongs to a group. } } \description{ @@ -29,8 +71,9 @@ data("slides") head(slides) ## Coerce to compositional data -coda <- as_composition(slides) -head(augment(coda)) +coda <- as_composition(slides, groups = 2) + +get_groups(coda) } \seealso{ Other mutators: diff --git a/man/outliers.Rd b/man/outliers.Rd index cff7d42..73e1d34 100644 --- a/man/outliers.Rd +++ b/man/outliers.Rd @@ -12,7 +12,7 @@ outliers(object, ...) \S4method{outliers}{CompositionMatrix}( object, ..., - groups = NULL, + groups = get_groups(object), method = c("mve", "mcd"), quantile = 0.975 ) @@ -24,8 +24,7 @@ outliers(object, ...) Only used if \code{robust} is \code{TRUE}.} \item{groups}{A \code{vector} of grouping elements, as long as the variables in -\code{object}. If a single \code{character} string is passed, it must be the name of a -categorical variable from the original dataset.} +\code{object}.} \item{method}{A \code{\link{character}} string specifying the method to be used. It must be one of "\code{mve}" (minimum volume ellipsoid) or "\code{mcd}" (minimum @@ -62,7 +61,7 @@ why a particular threshold should be applicable to all data sets ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, parts = 3:17) +coda <- as_composition(kommos, parts = 3:17, groups = 1) ## Detect outliers out <- outliers(coda, groups = NULL, method = "mcd") @@ -73,7 +72,7 @@ plot(out, type = "qqplot") ## Detect outliers by group ## (use ceramic types for grouping) -out <- outliers(coda, groups = "type", method = "mcd") +out <- outliers(coda, method = "mcd") plot(out, type = "dotchart", select = 1, robust = FALSE) plot(out, type = "dotchart", select = 2, robust = FALSE) diff --git a/man/pca.Rd b/man/pca.Rd index 1f98ccc..3a58d5a 100644 --- a/man/pca.Rd +++ b/man/pca.Rd @@ -65,7 +65,7 @@ decomposition. ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos) # Use ceramic types for grouping +coda <- as_composition(kommos, groups = 1) # Use ceramic types for grouping ## Centered log-ratio clr <- transform_clr(coda) @@ -74,7 +74,7 @@ clr <- transform_clr(coda) X <- pca(clr, scale = FALSE) ## Explore results -viz_individuals(X, highlight = "type", pch = 16) +viz_individuals(X, highlight = get_groups(coda), pch = 16) viz_variables(X) } \references{ diff --git a/man/plot.Rd b/man/plot.Rd index b3f175e..e95eb10 100644 --- a/man/plot.Rd +++ b/man/plot.Rd @@ -10,7 +10,7 @@ x, ..., margin = NULL, - groups = NULL, + groups = get_groups(x), palette_color = palette_color_discrete(), palette_symbol = palette_shape() ) @@ -26,8 +26,7 @@ the column to be used as the third part of the ternary plots. If \code{NULL} of the non-selected parts).} \item{groups}{A \code{vector} of grouping elements, as long as the variables in -\code{x}. If a single \code{character} string is passed, it must be the name of a -categorical variable from the original dataset.} +\code{x}.} \item{palette_color}{A palette \code{\link{function}} that when called with a single argument (\code{groups}) returns a \code{character} vector of colors.} @@ -46,10 +45,10 @@ Displays a matrix of ternary plots. ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, parts = 3:8) +coda <- as_composition(kommos, parts = 3:8, groups = 1) ## Use ceramic types for grouping -plot(coda, groups = "type") +plot(coda) ## Center and scale ternary plots plot(coda, groups = NULL, center = TRUE, scale = TRUE) diff --git a/man/plot_logratio.Rd b/man/plot_logratio.Rd index 268a5b9..2856c7d 100644 --- a/man/plot_logratio.Rd +++ b/man/plot_logratio.Rd @@ -9,7 +9,7 @@ \S4method{plot}{LogRatio,missing}( x, ..., - groups = NULL, + groups = get_groups(x), palette_color = palette_color_discrete(), rug = TRUE, ticksize = 0.05, @@ -31,9 +31,7 @@ \code{border} and \code{col}.} \item{groups}{A \code{vector} of grouping elements, as long as the variables in -\code{x}. If a single \code{character} string is passed, it must be the name of a -categorical variable from the original dataset. -If set, a matrix of panels defined by \code{groups} will be drawn.} +\code{x}. If set, a matrix of panels defined by \code{groups} will be drawn.} \item{palette_color}{A palette \code{\link{function}} that when called with a single argument (\code{groups}) returns a \code{character} vector of colors.} @@ -78,7 +76,7 @@ Displays a density plot. ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos) +coda <- as_composition(kommos, groups = 1) ## Log ratio clr <- transform_clr(coda) @@ -87,7 +85,7 @@ clr <- transform_clr(coda) plot(clr, groups = NULL, flip = TRUE) ## Use ceramic types for grouping -plot(clr, groups = "type", flip = TRUE) +plot(clr, flip = TRUE) } \seealso{ Other plot methods: diff --git a/man/plot_outliers.Rd b/man/plot_outliers.Rd index 6a6243a..8e1d713 100644 --- a/man/plot_outliers.Rd +++ b/man/plot_outliers.Rd @@ -88,7 +88,7 @@ Plot Outliers ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, parts = 3:17) +coda <- as_composition(kommos, parts = 3:17, groups = 1) ## Detect outliers out <- outliers(coda, groups = NULL, method = "mcd") @@ -99,7 +99,7 @@ plot(out, type = "qqplot") ## Detect outliers by group ## (use ceramic types for grouping) -out <- outliers(coda, groups = "type", method = "mcd") +out <- outliers(coda, method = "mcd") plot(out, type = "dotchart", select = 1, robust = FALSE) plot(out, type = "dotchart", select = 2, robust = FALSE) diff --git a/man/reexports.Rd b/man/reexports.Rd index 6a272f6..2188655 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -20,7 +20,7 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{arkhe}{\code{\link[arkhe]{remove_NA}}, \code{\link[arkhe]{remove_constant}}, \code{\link[arkhe]{remove_zero}}, \code{\link[arkhe]{sparsity}}} + \item{arkhe}{\code{\link[arkhe]{remove_constant}}, \code{\link[arkhe]{remove_NA}}, \code{\link[arkhe]{remove_zero}}, \code{\link[arkhe]{sparsity}}} \item{khroma}{\code{\link[khroma]{palette_color_continuous}}, \code{\link[khroma]{palette_color_discrete}}, \code{\link[khroma]{palette_color_picker}}, \code{\link[khroma:palette_shape]{palette_line}}, \code{\link[khroma]{palette_shape}}, \code{\link[khroma]{palette_size_range}}} }} diff --git a/vignettes/nexus.Rmd b/vignettes/nexus.Rmd index 9869b3c..8a28b9c 100644 --- a/vignettes/nexus.Rmd +++ b/vignettes/nexus.Rmd @@ -28,7 +28,7 @@ Provenance studies rely on the identification of probable sources, such that the # Get started ```{r setup} -## Install extra packages (if needed): +## Install extra packages (if needed) # install.packages("folio") library(nexus) @@ -62,11 +62,20 @@ counts <- as_amounts(coda) all.equal(hongite, as.data.frame(counts)) ``` +The `parts` argument of the function `as_composition()` is used to define the columns to be used as the compositional part. If `parts` is `NULL` (the default), all non-integer numeric columns (i.e. of type `double`) are used. In the case of a `data.frame` coercion, additional columns are removed. +```{r} +## Create a data.frame +X <- data.frame( + type = c("A", "A", "B", "A", "B", "C", "C", "C", "B"), + Ca = c(7.72, 7.32, 3.11, 7.19, 7.41, 5, 4.18, 1, 4.51), + Fe = c(6.12, 5.88, 5.12, 6.18, 6.02, 7.14, 5.25, 5.28, 5.72), + Na = c(0.97, 1.59, 1.25, 0.86, 0.76, 0.51, 0.75, 0.52, 0.56) +) -```{r plot, fig.width=7, fig.height=5, out.width='100%'} -## Ternary plots -plot(coda) +## Coerce to a compositional matrix +## (the 'type' column will be removed) +Y <- as_composition(X) ``` # Working with (reference) groups @@ -74,31 +83,35 @@ plot(coda) Provenance studies typically rely on two approaches, which can be used together: * Identification of groups among the artifacts being studied, based on mineralogical or geochemical criteria (*clustering*). -* Comparison with so-called reference groups, i.e. known geological sources or productive contexts (*classification*). +* Comparison with so-called reference groups, i.e. known geological sources or archaeological contexts (*classification*). -**nexus** allows to specify whether an observation belongs to a specific group (or not): +When coercing a `data.frame` to a `CompositionMatrix` object, **nexus** allows to specify whether an observation belongs to a specific group (or not): -The `parts` argument of the function `as_composition()` is used to define the columns to be used as the compositional part. If `parts` is `NULL` (the default), all non-integer numeric columns (i.e. of type `double`) are used. In the case of a `data.frame` coercion, additional columns are stored internally. This preserves the information contained in these additional variables (whether qualitative or quantitative). +```{r} +## Coerce to a compositional matrix +## Use the first column for grouping +Y <- as_composition(X, groups = 1) + +any_assigned(Y) +is_assigned(Y) +``` + +`get_groups(x)` and `set_groups(x) <- value` allow to retrieve or set groups of an existing `CompositionMatrix`. Missing values (`NA`) can be used to specify that a sample does not belong to any group): ```{r} -## Create a data.frame -X <- data.frame( - type = c("A", "A", "B", "A", "B", "C", "C", "C", "B"), - Ca = c(7.72, 7.32, 3.11, 7.19, 7.41, 5, 4.18, 1, 4.51), - Fe = c(6.12, 5.88, 5.12, 6.18, 6.02, 7.14, 5.25, 5.28, 5.72), - Na = c(0.97, 1.59, 1.25, 0.86, 0.76, 0.51, 0.75, 0.52, 0.56) -) +## Set groups (NA means no group) +set_groups(Y) <- c("X", "X", "Y", "X", "Y", NA, NA, NA, "Y") +is_assigned(Y) -## Coerce to a compositional matrix -## (the 'type' column will be preserved) -Y <- as_composition(X) +## Retrieve groups +get_groups(Y) ``` -These additional variables can be used by further methods (e.g. through the `groups` argument of the plotting functions): +Once groups have been defined, they can be used by further methods (e.g. plotting). ```{r barplot, fig.width=7, fig.height=5, out.width='100%'} ## Compositional bar plot -barplot(Y, groups = "type", order = "Ca") +plot(Y) ``` # Log-ratio transformations @@ -120,20 +133,21 @@ all.equal(back, coda) ```{r ceramics} ## Data from Day et al. 2011 data("kommos", package = "folio") +describe(kommos) ## Remove rows with missing values kommos <- remove_NA(kommos, margin = 1) ## Coerce to a compositional matrix -coda <- as_composition(kommos, parts = 3:22) +coda <- as_composition(kommos, parts = 3:22, groups = "type") ``` ```{r ceramics-barplot, fig.width=7, fig.height=7, out.width='100%'} ## Compositional bar plot -barplot(coda, groups = "type", order = "Ca") +barplot(coda, order = "Ca") ``` -## Principle Component Analysis +## Principal Components Analysis ```{r pca, fig.width=7, fig.height=7, out.width='50%', fig.show='hold'} ## CLR @@ -145,7 +159,7 @@ clr_pca <- pca(clr, scale = FALSE) ## Visualize results viz_individuals( x = clr_pca, - highlight = "type", + highlight = get_groups(clr), pch = 16, col = c("#EE7733", "#0077BB", "#33BBEE", "#EE3377") ) @@ -154,11 +168,13 @@ viz_variables(clr_pca) ## MANOVA +The log-transformed data can be assigned to a new column, allowing us to keep working with the data in the context of the original `data.frame`: + ```{r manova} ## ILR ilr <- transform_ilr(coda) -kommos$ilr <- ilr +kommos$ilr <- ilr ## MANOVA fit <- manova(ilr ~ type, data = kommos) @@ -167,7 +183,7 @@ summary(fit) The MANOVA results suggest that there are statistically significant differences between groups. -## Discriminant Analysis +## Linear Discriminant Analysis ```{r lda, fig.width=7, fig.height=7, out.width='100%'} ## LDA