diff --git a/DESCRIPTION b/DESCRIPTION index a3d8892..7ed56e4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,7 @@ Depends: R (>= 3.5), dimensio (>= 0.8.0) Imports: - arkhe (>= 1.6.0), + arkhe (>= 1.7.0), graphics, grDevices, isopleuros (>= 1.2.0), diff --git a/NAMESPACE b/NAMESPACE index a1262d5..4f66e39 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,12 +26,8 @@ exportMethods("%power%") exportMethods("[") exportMethods("[<-") exportMethods("[[<-") -exportMethods("set_groups<-") -exportMethods("set_samples<-") exportMethods("set_totals<-") exportMethods(aggregate) -exportMethods(any_assigned) -exportMethods(any_replicated) exportMethods(as_amounts) exportMethods(as_composition) exportMethods(as_features) @@ -43,11 +39,8 @@ exportMethods(covariance) exportMethods(describe) exportMethods(dist) exportMethods(get_groups) -exportMethods(get_samples) exportMethods(get_totals) exportMethods(hist) -exportMethods(is_assigned) -exportMethods(is_replicated) exportMethods(mahalanobis) exportMethods(margin) exportMethods(mean) diff --git a/R/AllClasses.R b/R/AllClasses.R index 8409771..7a06384 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -46,9 +46,7 @@ setClassUnion("index", members = c("logical", "numeric", "character")) #' An S4 class to represent compositional data. #' @slot totals A [`numeric`] vector to store the absolute row sums (before #' the closure of the compositions). -#' @slot samples A [`character`] vector to store the sample identifiers -#' (allows duplicates in case of repeated measurements). -#' @slot groups A [`character`] vector to store the group names. +#' @slot extra A [`list`] of extra variables. #' @section Coerce: #' In the code snippets below, `x` is a `CompositionMatrix` object. #' \describe{ @@ -72,8 +70,7 @@ setClassUnion("index", members = c("logical", "numeric", "character")) Class = "CompositionMatrix", slots = c( totals = "numeric", - samples = "character", - groups = "character" + extra = "list" ), contains = c("NumericMatrix") ) @@ -84,9 +81,7 @@ setClassUnion("index", members = c("logical", "numeric", "character")) #' S4 classes to represent log-ratio data transformations. #' @slot totals A [`numeric`] vector to store the absolute row sums (before #' the closure of the compositions). -#' @slot samples A [`character`] vector to store the sample identifiers -#' (allows duplicates in case of repeated measurements). -#' @slot groups A [`character`] vector to store the group names. +#' @slot extra A [`list`] of extra variables. #' @slot parts A [`character`] vector to store the part names. #' @slot ratio A [`character`] vector to store the ratio names. #' @slot order An [`integer`] vector to store the original ordering of the @@ -112,8 +107,7 @@ setClassUnion("index", members = c("logical", "numeric", "character")) Class = "LogRatio", slots = c( totals = "numeric", - samples = "character", - groups = "character", + extra = "list", parts = "character", ratio = "character", diff --git a/R/AllGenerics.R b/R/AllGenerics.R index dce0d88..f5266c6 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -18,31 +18,14 @@ NULL #' #' Coerces an object to a `CompositionMatrix` object. #' @param from A [`matrix`] or [`data.frame`] to be coerced. -#' @param samples An [`integer`] giving the index of the column to be used for -#' sample identification: allows duplicates to identify replicated measurements. -#' If `NULL` (the default), row names will be used as sample IDs. -#' @param groups An [`integer`] giving the index of the column to be used to -#' group the samples. If `NULL` (the default), no grouping is stored. -#' @param auto A [`logical`] scalar: try to automatically detect `codes`, -#' `samples` and `groups` columns? +#' @param parts A `vector` giving the index of the column to be used a +#' compositional parts. If `NULL` (the default), all [`double`] columns will be +#' used. #' @param verbose A [`logical`] scalar: should \R report extra information #' on progress? #' @param ... Currently not used. #' @details -#' The [`CompositionMatrix-class`] class has special slots: -#' -#' * `samples` for [repeated measurements/observation][samples], -#' * `groups` to [group data by site/area][group]. -#' -#' When coercing a `data.frame` to a [`CompositionMatrix-class`] object, an -#' attempt is made to automatically assign values to these slots by mapping -#' column names (case insensitive, plural insensitive). This behavior can be -#' disabled by setting `options(nexus.autodetect = FALSE)` or overridden by -#' explicitly specifying the columns to be used. -#' #' See `vignette("nexus")`. -#' @note -#' All non-numeric variable will be removed. #' @return A [`CompositionMatrix-class`] object. #' @example inst/examples/ex-coerce.R #' @author N. Frerebeau @@ -77,7 +60,7 @@ setGeneric( #' @param from A [`CompositionMatrix-class`] object. #' @param ... Currently not used. #' @return -#' A [`data.frame`] with all informations as extra columns. +#' A [`data.frame`]. #' @example inst/examples/ex-coerce.R #' @author N. Frerebeau #' @docType methods @@ -112,7 +95,7 @@ NULL #' \describe{ #' \item{`%perturbe%`}{[Perturbation operation][perturbation()].} #' \item{`%power%`}{[Powering operation][powering()].} -#' } +#' } #' @return #' A [`CompositionMatrix-class`] object or a [`numeric`] vector (same as `x`). #' @example inst/examples/ex-arith.R @@ -220,17 +203,9 @@ setGeneric( #' #' Retrieves or defines the groups to which the observations belong. #' @param x An object from which to get or set `groups`. -#' @param value A possible value for the `groups` of `x`. -#' @details -#' See `vignette("nexus")`. +# @param value A possible value for the `groups` of `x`. #' @return -#' * `set_groups()` returns an object of the same sort as `x` with the new -#' group names assigned. #' * `get_groups()` returns a [`character`] vector giving the group names of `x`. -#' * `any_assigned()` returns a [`logical`] scalar specifying whether or not `x` -#' has groups. -#' * `is_assigned()` returns a [`logical`] vector specifying whether or not an -#' observation belongs to a group. #' @example inst/examples/ex-mutators.R #' @author N. Frerebeau #' @docType methods @@ -239,20 +214,6 @@ setGeneric( #' @rdname groups NULL -#' @rdname groups -#' @aliases any_assigned-method -setGeneric( - name = "any_assigned", - def = function(x) standardGeneric("any_assigned") -) - -#' @rdname groups -#' @aliases is_assigned-method -setGeneric( - name = "is_assigned", - def = function(x) standardGeneric("is_assigned") -) - #' @rdname groups #' @aliases get_groups-method setGeneric( @@ -260,69 +221,6 @@ setGeneric( def = function(x) standardGeneric("get_groups") ) -#' @rdname groups -#' @aliases set_groups-method -setGeneric( - name = "set_groups<-", - def = function(x, value) standardGeneric("set_groups<-") -) - -#' Working With Samples -#' -#' Retrieves or defines the sample names. -#' @param x An object from which to get or set `samples`. -#' @param value A possible value for the `samples` of `x`. -#' @details -#' In some situations, measurements may have been repeated (e.g. multiple -#' chemical analyses on the same sample). The presence of repeated -#' measurements can be specified by giving several observations the same -#' sample name. -#' -#' See `vignette("nexus")`. -#' @return -#' * `set_samples()` returns an object of the same sort as `x` with the new -#' sample names assigned. -#' * `get_samples()` returns a [`character`] vector giving the sample names of `x`. -#' * `any_replicated()` returns a [`logical`] scalar specifying whether or not -#' `x` has replicated observations. -#' * `is_replicated()` returns a [`logical`] vector specifying whether or not -#' an observation is a replicate. -#' @example inst/examples/ex-mutators.R -#' @author N. Frerebeau -#' @docType methods -#' @family mutators -#' @name samples -#' @rdname samples -NULL - -#' @rdname samples -#' @aliases any_replicated-method -setGeneric( - name = "any_replicated", - def = function(x) standardGeneric("any_replicated") -) - -#' @rdname samples -#' @aliases is_replicated-method -setGeneric( - name = "is_replicated", - def = function(x) standardGeneric("is_replicated") -) - -#' @rdname samples -#' @aliases get_samples-method -setGeneric( - name = "get_samples", - def = function(x) standardGeneric("get_samples") -) - -#' @rdname samples -#' @aliases set_samples-method -setGeneric( - name = "set_samples<-", - def = function(x, value) standardGeneric("set_samples<-") -) - #' Row Sums #' #' Retrieves or defines the row sums (before closure). @@ -662,7 +560,8 @@ setGeneric( #' returns the result. #' @param x A [`CompositionMatrix-class`] object. #' @param by A `vector` or a list of grouping elements, each as long as the -#' variables in `x`. The elements are coerced to factors before use. +#' variables in `x`. The elements are coerced to factors before use +#' (in the sense that [`as.factor(by)`][as.factor()] defines the grouping). #' @param FUN A [`function`] to compute the summary statistics. #' @param simplify A [`logical`] scalar: should the results be simplified to a #' matrix if possible? @@ -721,7 +620,10 @@ NULL #' #' Splits the data into subsets and computes compositional mean for each. #' @param x A [`CompositionMatrix-class`] object. -#' @param by A `vector` of grouping elements, as long as the variables in `x`. +#' @param by A `vector` of grouping elements, as long as the variables in `x` +#' (in the sense that [`as.factor(by)`][as.factor()] defines the grouping). +#' If a single `character` string is passed, it must be the name of a +#' categorical variable from the original dataset. #' @param ... Further arguments to be passed to [mean()]. #' @return A [`CompositionMatrix-class`] object. #' @seealso [mean()], [aggregate()] @@ -979,12 +881,14 @@ NULL #' #' Displays a compositional bar chart. #' @param height A [`CompositionMatrix-class`] object. +#' @param groups A `vector` of grouping elements, as long as the variables in +#' `height`. If a single `character` string is passed, it must be the name of a +#' categorical variable from the original dataset. +#' If set, a matrix of panels defined by `groups` will be drawn. #' @param order An [`integer`] vector giving the index of the column to be used #' for the ordering of the data. #' @param decreasing A [`logical`] scalar: should the sort order be increasing #' or decreasing? -#' @param groups A `vector` of grouping elements, as long as the variables in -#' `height`. If set, a matrix of panels defined by `groups` will be drawn. #' @param horiz A [`logical`] scalar. If `FALSE`, the bars are drawn vertically #' with the first bar to the left. If `TRUE` (the default), the bars are drawn #' horizontally with the first at the bottom. @@ -1051,7 +955,9 @@ NULL #' #' Displays a matrix of ternary plots. #' @param x A [`CompositionMatrix-class`] object. -#' @param groups A `vector` of grouping elements, as long as the variables in `x`. +#' @param groups A `vector` of grouping elements, as long as the variables in +#' `x`. If a single `character` string is passed, it must be the name of a +#' categorical variable from the original dataset. #' @inheritParams isopleuros::ternary_pairs #' @return #' `plot()` is called for its side-effects: is results in a graphic being @@ -1070,7 +976,9 @@ NULL #' #' Displays a density plot. #' @param x A [`LogRatio-class`] object. -#' @param groups A `vector` of grouping elements, as long as the variables in `x`. +#' @param groups A `vector` of grouping elements, as long as the variables in +#' `x`. If a single `character` string is passed, it must be the name of a +#' categorical variable from the original dataset. #' If set, a matrix of panels defined by `groups` will be drawn. #' @param rug A [`logical`] scalar: should a *rug* representation (1-d plot) of #' the data be added to the plot? @@ -1245,7 +1153,9 @@ NULL #' `quantile` is used as a cut-off value for outlier detection: observations #' with larger (squared) Mahalanobis distance are considered as potential #' outliers. -#' @param groups A `vector` of grouping elements, as long as the variables in `object`. +#' @param groups A `vector` of grouping elements, as long as the variables in +#' `object`. If a single `character` string is passed, it must be the name of a +#' categorical variable from the original dataset. #' @details #' An outlier can be defined as having a very large Mahalanobis distance from #' all observations. In this way, a certain proportion of the observations can diff --git a/R/barplot.R b/R/barplot.R index 43df499..b8cdbf8 100644 --- a/R/barplot.R +++ b/R/barplot.R @@ -5,9 +5,9 @@ NULL # CompositionMatrix ============================================================ #' @export #' @method barplot CompositionMatrix -barplot.CompositionMatrix <- function(height, ..., +barplot.CompositionMatrix <- function(height, ..., groups = NULL, order = NULL, decreasing = FALSE, - groups = get_groups(height), horiz = TRUE, + horiz = TRUE, xlab = NULL, ylab = NULL, main = NULL, sub = NULL, ann = graphics::par("ann"), axes = TRUE, @@ -36,8 +36,8 @@ barplot.CompositionMatrix <- function(height, ..., y_side <- if (horiz) 2 else 1 ## Grouping - n <- 0 - if (!all(is.na(groups))) { + groups <- get_variable(height, which = groups) + if (!is.null(groups) && !all(is.na(groups))) { arkhe::assert_length(groups, nrow(z)) groups <- groups[ordering] diff --git a/R/coerce.R b/R/coerce.R index 8fdde87..5a9d240 100644 --- a/R/coerce.R +++ b/R/coerce.R @@ -22,17 +22,16 @@ setMethod( f = "as_composition", signature = c(from = "matrix"), definition = function(from) { + ## Make row/column names + lab <- make_names(x = NULL, n = nrow(from), prefix = "S") + rownames(from) <- if (has_rownames(from)) rownames(from) else lab + colnames(from) <- make_names(x = colnames(from), n = ncol(from), prefix = "V") + + ## Close totals <- rowSums(from, na.rm = TRUE) from <- from / totals - spl <- make_names(x = NULL, n = nrow(from), prefix = "S") - lab <- if (has_rownames(from)) rownames(from) else make_codes(spl) - grp <- rep(NA_character_, nrow(from)) - - rownames(from) <- lab - colnames(from) <- make_names(x = colnames(from), n = ncol(from), prefix = "V") - - .CompositionMatrix(from, totals = totals, samples = spl, groups = grp) + .CompositionMatrix(from, totals = unname(totals)) } ) @@ -42,78 +41,40 @@ setMethod( setMethod( f = "as_composition", signature = c(from = "data.frame"), - definition = function(from, samples = NULL, groups = NULL, - auto = getOption("nexus.autodetect"), + definition = function(from, parts = NULL, verbose = getOption("nexus.verbose")) { - - cols <- colnames(from) - empty <- rep(NA_character_, nrow(from)) - - index <- function(what, where) { - grep(what, where, ignore.case = TRUE, value = FALSE) - } - - ## Sample names - spl <- make_names(x = NULL, n = nrow(from), prefix = "S") - if (is.null(samples) && auto) samples <- index("^sample[s]{0,1}$", cols) - if (length(samples) == 1) { - if (is.character(samples)) samples <- match(samples, cols) - spl <- as.character(from[[samples]]) - } - - ## Identifiers (must be unique) - lab <- if (has_rownames(from)) rownames(from) else make_codes(spl) - - ## Group names - grp <- empty - if (is.null(groups) && auto) groups <- index("^group[s]{0,1}$", cols) - if (length(groups) == 1) { - if (is.character(groups)) groups <- match(groups, cols) - grp <- as.character(from[[groups]]) - grp[grp == ""] <- NA_character_ - } - - ## Drop extra columns (if any) - drop <- c(samples, groups) - data <- if (length(drop) > 0) from[, -drop, drop = FALSE] else from + ## Clean row/column names + lab <- make_names(x = NULL, n = nrow(from), prefix = "S") + rownames(from) <- if (has_rownames(from)) rownames(from) else lab + colnames(from) <- make_names(x = colnames(from), n = ncol(from), prefix = "V") ## Remove non-numeric columns - data <- arkhe::keep_cols(x = data, f = is.numeric, all = FALSE, - verbose = verbose) - arkhe::assert_filled(data) + if (is.null(parts)) { + parts <- arkhe::detect(from, f = is.double, margin = 2) # Logical + if (verbose) { + n <- sum(parts) + what <- ngettext(n, "part", "parts") + cols <- paste0(colnames(from)[parts], collapse = ", ") + msg <- "Found %g %s (%s)." + message(sprintf(msg, n, what, cols)) + } + } else { + if (is.numeric(parts)) parts <- seq_len(ncol(from)) %in% parts + if (is.character(parts)) parts <- colnames(from) %in% parts + } + coda <- from[, parts, drop = FALSE] + extra <- from[, !parts, drop = FALSE] + arkhe::assert_filled(coda) ## Build matrix - data <- data.matrix(data, rownames.force = NA) - totals <- rowSums(data, na.rm = TRUE) - data <- data / totals - rownames(data) <- lab + coda <- data.matrix(coda, rownames.force = NA) + totals <- rowSums(coda, na.rm = TRUE) + coda <- coda / totals - .CompositionMatrix(data, totals = totals, samples = spl, groups = grp) + .CompositionMatrix(coda, totals = unname(totals), extra = as.list(extra)) } ) -make_codes <- function(x) { - if (!any(duplicated(x))) return(x) - x <- split(x = seq_along(x), f = x) - nm <- rep(names(x), lengths(x)) - nm <- tapply( - X = nm, - INDEX = nm, - FUN = function(x) paste(x, seq_along(x), sep = "_"), - simplify = FALSE - ) - - x <- unlist(x, use.names = FALSE) - nm <- unlist(nm, use.names = FALSE) - nm[x] -} - -make_names <- function(x, n = length(x), prefix = "X") { - x <- if (n > 0) x %||% paste0(prefix, seq_len(n)) else character(0) - x <- make.unique(x, sep = "_") - x -} - # To Amounts =================================================================== #' @export #' @rdname as_amounts @@ -134,11 +95,11 @@ setMethod( f = "as_features", signature = c(from = "CompositionMatrix"), definition = function(from) { - data.frame( - sample = get_samples(from), - group = get_groups(from), - from - ) + if (has_extra(from)) { + data.frame(get_extra(from), from) + } else { + as.data.frame(from) + } } ) @@ -149,11 +110,11 @@ setMethod( f = "as_features", signature = c(from = "LogRatio"), definition = function(from) { - data.frame( - sample = get_samples(from), - group = get_groups(from), - from - ) + if (has_extra(from)) { + data.frame(get_extra(from), from) + } else { + as.data.frame(from) + } } ) diff --git a/R/condense.R b/R/condense.R index 9da862b..79cb353 100644 --- a/R/condense.R +++ b/R/condense.R @@ -8,11 +8,13 @@ NULL setMethod( f = "condense", signature = c("CompositionMatrix"), - definition = function(x, by = get_samples(x), ...) { + definition = function(x, by, ...) { m <- nrow(x) + ## Grouping + by <- get_variable(x, which = by) arkhe::assert_length(by, m) - by <- factor(x = by, levels = unique(by)) # Keep original ordering + by <- as.factor(by) z <- tapply( X = seq_len(m), @@ -27,17 +29,8 @@ setMethod( z <- do.call(rbind, z) tot <- tapply(X = get_totals(x), INDEX = by, FUN = mean, simplify = TRUE) - lab <- flatten_chr(x = by, by = by) - spl <- flatten_chr(x = get_samples(x), by = by) - grp <- flatten_chr(x = get_groups(x), by = by) - rownames(z) <- lab - .CompositionMatrix(z, totals = as.numeric(tot), samples = spl, groups = grp) + rownames(z) <- levels(by) + .CompositionMatrix(z, totals = as.numeric(tot)) } ) - -flatten_chr <- function(x, by) { - z <- tapply(X = x, INDEX = by, FUN = unique, simplify = FALSE) - z <- vapply(X = z, FUN = paste0, FUN.VALUE = character(1), collapse = ":") - z -} diff --git a/R/describe.R b/R/describe.R index 103ec36..21e7b0c 100644 --- a/R/describe.R +++ b/R/describe.R @@ -40,23 +40,7 @@ setMethod( spa <- arkhe::sparsity(x, count = FALSE) msg_spa <- sprintf("%s of values are zero.", label_percent(spa, digits = 1)) - ## Samples - n_spl <- length(unique(get_samples(x))) - n_dpl <- sum(is_replicated(x)) - msg_spl <- sprintf("%d unique %s.", n_spl, ngettext(n_spl, "sample", "samples")) - msg_dpl <- sprintf("%d replicated %s.", n_dpl, ngettext(n_dpl, "observation", "observations")) - - ## Groups - groups <- get_groups(x) - grp <- unique(groups[!is.na(groups)]) - n_grp <- length(grp) - n_ung <- sum(is.na(groups)) - ls_grp <- if (n_grp == 0) "" else paste0(": ", paste0(dQuote(grp), collapse = ", ")) - msg_grp <- sprintf("%d %s%s.", n_grp, ngettext(n_grp, "group", "groups"), - ls_grp) - msg_ung <- sprintf("%d unassigned %s.", n_ung, ngettext(n_ung, "sample", "samples")) - - cat(msg_tbl, msg_spl, msg_dpl, msg_grp, msg_ung, sep = "\n* ") + cat(msg_tbl, sep = "\n* ") cat("\nData checking:", msg_spa, msg_col_var, sep = "\n* ") cat("\nMissing values:", msg_row_NA, msg_col_NA, sep = "\n* ") diff --git a/R/mutators.R b/R/mutators.R index 5ee1bee..c90d46e 100644 --- a/R/mutators.R +++ b/R/mutators.R @@ -14,166 +14,45 @@ get_transformation <- function(x) { ) } -get_features <- function(x) { - list( - sample = get_samples(x), - group = get_groups(x) - ) +# Supplementary variables ====================================================== +get_extra <- function(x) { + x@extra } -has_rownames <- function(x) { - .row_names_info(x, type = 1L) > 0L && - !is.na(.row_names_info(x, type = 0L)[[1L]]) +has_extra <- function(x) { + extra <- get_extra(x) + length(extra) > 0 && all(lengths(extra) > 0) } -# Groups ======================================================================= -#' @export -#' @rdname groups -#' @aliases is_assigned,CompositionMatrix-method -setMethod("is_assigned", "CompositionMatrix", function(x) !is.na(get_groups(x))) - -#' @export -#' @rdname groups -#' @aliases is_assigned,LogRatio-method -setMethod("is_assigned", "LogRatio", function(x) !is.na(get_groups(x))) - -#' @export -#' @rdname groups -#' @aliases any_assigned,CompositionMatrix-method -setMethod("any_assigned", "CompositionMatrix", function(x) any(is_assigned(x))) - -#' @export -#' @rdname groups -#' @aliases any_assigned,LogRatio-method -setMethod("any_assigned", "LogRatio", function(x) any(is_assigned(x))) - -#' @export -#' @rdname groups -#' @aliases get_groups,CompositionMatrix-method -setMethod("get_groups", "CompositionMatrix", function(x) x@groups) - -#' @export -#' @rdname groups -#' @aliases get_groups,LogRatio-method -setMethod("get_groups", "LogRatio", function(x) x@groups) +get_variable <- function(x, which = NULL) { + if (is.character(which) && length(which) == 1) { + extra <- get_extra(x)[[which]] + if (is.null(extra)) { + warning(sprintf("There is no such variable: %s.", which), call. = FALSE) + } + return(extra) + } + which +} +# Groups ======================================================================= +# is_assigned any_assigned #' @export #' @rdname groups #' @aliases get_groups,OutlierIndex-method setMethod("get_groups", "OutlierIndex", function(x) x@groups) -#' @export -#' @rdname groups -#' @aliases set_groups,CompositionMatrix-method -setMethod( - f = "set_groups<-", - signature = "CompositionMatrix", - definition = function(x, value) { - empty <- rep(NA_character_, nrow(x)) - if (is.null(value)) { - x@groups <- empty - } else { - value <- as.character(value) - value[value == ""] <- NA_character_ - x@groups <- value - } - methods::validObject(x) - x - } -) - -# Samples ====================================================================== -#' @export -#' @rdname samples -#' @aliases is_replicated,CompositionMatrix-method -setMethod( - f = "is_replicated", - signature = "CompositionMatrix", - definition = function(x) { - spl <- get_samples(x) - duplicated(spl, fromLast = FALSE) | duplicated(spl, fromLast = TRUE) - } -) - -#' @export -#' @rdname samples -#' @aliases is_replicated,LogRatio-method -setMethod( - f = "is_replicated", - signature = "LogRatio", - definition = function(x) { - spl <- get_samples(x) - duplicated(spl, fromLast = FALSE) | duplicated(spl, fromLast = TRUE) - } -) - -#' @export -#' @rdname samples -#' @aliases is_replicated,OutlierIndex-method -setMethod( - f = "is_replicated", - signature = "OutlierIndex", - definition = function(x) { - spl <- get_samples(x) - duplicated(spl, fromLast = FALSE) | duplicated(spl, fromLast = TRUE) - } -) - -#' @export -#' @rdname samples -#' @aliases any_replicated,CompositionMatrix-method -setMethod("any_replicated", "CompositionMatrix", function(x) any(is_replicated(x))) - -#' @export -#' @rdname samples -#' @aliases any_replicated,LogRatio-method -setMethod("any_replicated", "LogRatio", function(x) any(is_replicated(x))) - -#' @export -#' @rdname samples -#' @aliases any_replicated,OutlierIndex-method -setMethod("any_replicated", "OutlierIndex", function(x) any(is_replicated(x))) - -#' @export -#' @rdname samples -#' @aliases get_samples,CompositionMatrix-method -setMethod("get_samples", "CompositionMatrix", function(x) x@samples) - -#' @export -#' @rdname samples -#' @aliases get_samples,LogRatio-method -setMethod("get_samples", "LogRatio", function(x) x@samples) - -#' @export -#' @rdname samples -#' @aliases get_samples,OutlierIndex-method -setMethod("get_samples", "OutlierIndex", function(x) x@samples) - -#' @export -#' @rdname samples -#' @aliases set_samples,CompositionMatrix-method -setMethod( - f = "set_samples<-", - signature = "CompositionMatrix", - definition = function(x, value) { - if (is.null(value)) { - value <- make_names(x = NULL, n = nrow(x), prefix = "S") - } else { - value <- as.character(value) - } - - x@samples <- value - methods::validObject(x) - x - } -) - # Totals ======================================================================= #' @export #' @rdname totals #' @aliases get_totals,CompositionMatrix-method setMethod("get_totals", "CompositionMatrix", function(x) x@totals) +#' @export +#' @rdname totals +#' @aliases get_totals,LogRatio-method +setMethod("get_totals", "LogRatio", function(x) x@totals) + #' @export #' @rdname totals #' @aliases set_totals,CompositionMatrix-method diff --git a/R/nexus-internal.R b/R/nexus-internal.R index 5fec1e8..28a913a 100644 --- a/R/nexus-internal.R +++ b/R/nexus-internal.R @@ -4,6 +4,11 @@ missingORnull <- function(x) { missing(x) || is.null(x) } +has_rownames <- function(x) { + .row_names_info(x, type = 1L) > 0L && + !is.na(.row_names_info(x, type = 0L)[[1L]]) +} + #' Label Percentages #' #' @param x A [`numeric`] vector. @@ -24,3 +29,38 @@ label_percent <- function(x, digits = NULL, trim = FALSE) { x[i] <- y x } + +flatten_chr <- function(x, by) { + z <- tapply(X = x, INDEX = by, FUN = unique, simplify = FALSE) + z <- vapply(X = z, FUN = paste0, FUN.VALUE = character(1), collapse = ":") + z +} + +is_replicated <- function(x) { + duplicated(x, fromLast = FALSE) | duplicated(x, fromLast = TRUE) +} +any_replicated <- function(x) { + any(is_replicated(x)) +} + +make_codes <- function(x) { + if (!any(duplicated(x))) return(x) + x <- split(x = seq_along(x), f = x) + nm <- rep(names(x), lengths(x)) + nm <- tapply( + X = nm, + INDEX = nm, + FUN = function(x) paste(x, seq_along(x), sep = "_"), + simplify = FALSE + ) + + x <- unlist(x, use.names = FALSE) + nm <- unlist(nm, use.names = FALSE) + nm[x] +} + +make_names <- function(x, n = length(x), prefix = "X") { + x <- if (n > 0) x %||% paste0(prefix, seq_len(n)) else character(0) + x <- make.unique(x, sep = "_") + x +} diff --git a/R/nexus-package.R b/R/nexus-package.R index ac75512..e2de0eb 100644 --- a/R/nexus-package.R +++ b/R/nexus-package.R @@ -9,9 +9,6 @@ #' #' @section Package options: #' `nexus` uses the following [options()] to configure behavior: -#' * `nexus.autodetect`: a [`logical`] scalar. Try to automatically assign -#' values to the corresponding slot of a `CompositionMatrix` object when -#' coercing a `data.frame`? Defaults to `TRUE`. #' * `nexus.verbose`: a [`logical`] scalar. Should \R report extra information #' on progress? Defaults to `TRUE`. #' diff --git a/R/outliers.R b/R/outliers.R index 30deb58..2cd1907 100644 --- a/R/outliers.R +++ b/R/outliers.R @@ -9,7 +9,7 @@ NULL setMethod( f = "outliers", signature = c(object = "CompositionMatrix"), - definition = function(object, ..., groups = get_groups(object), + definition = function(object, ..., groups = NULL, method = c("mve", "mcd"), quantile = 0.975) { ## Transformation z <- transform_ilr(object) @@ -17,10 +17,12 @@ setMethod( ## Grouping m <- nrow(z) p <- ncol(z) + groups <- get_variable(object, which = groups) if (is.null(groups) || all(is.na(groups))) { grp <- list(z) groups <- list(seq_len(m)) } else { + arkhe::assert_length(groups, m) grp <- split(z, f = groups) groups <- split(seq_len(m), f = groups) } @@ -66,7 +68,6 @@ setMethod( limit <- sqrt(stats::qchisq(p = quantile, df = p)) .OutlierIndex( - samples = get_samples(object), groups = groups, standard = sqrt(dc), diff --git a/R/pca.R b/R/pca.R index 3710dee..4978d64 100644 --- a/R/pca.R +++ b/R/pca.R @@ -25,11 +25,7 @@ setMethod( sup_row = NULL, sup_col = NULL, weight_row = NULL, weight_col = NULL) { z <- methods::callNextMethod() - z@extra <- get_features(object) - - ## Set groups (if any) - if (any_assigned(object)) z@rows@groups <- get_groups(object) - + z@extra <- get_extra(object) z } ) diff --git a/R/plot.R b/R/plot.R index 2705cad..4b3c0c7 100644 --- a/R/plot.R +++ b/R/plot.R @@ -5,8 +5,11 @@ NULL # CompositionMatrix ============================================================ #' @export #' @method plot CompositionMatrix -plot.CompositionMatrix <- function(x, ..., margin = NULL, groups = get_groups(x)) { +plot.CompositionMatrix <- function(x, ..., margin = NULL, groups = NULL) { + ## Grouping + groups <- get_variable(x, which = groups) if (!is.null(groups) && !all(is.na(groups))) { + arkhe::assert_length(groups, nrow(x)) col <- dimensio::palette_color_discrete(list(...)$col)(groups) pch <- dimensio::palette_shape(list(...)$pch)(groups) } else { @@ -26,8 +29,8 @@ setMethod("plot", c(x = "CompositionMatrix", y = "missing"), plot.CompositionMat # LogRatio ===================================================================== #' @export #' @method plot LogRatio -plot.LogRatio <- function(x, ..., - groups = get_groups(x), rug = TRUE, ticksize = 0.05, +plot.LogRatio <- function(x, ..., groups = NULL, + rug = TRUE, ticksize = 0.05, ncol = NULL, flip = FALSE, xlab = NULL, ylab = NULL, main = NULL, ann = graphics::par("ann"), @@ -43,6 +46,7 @@ plot.LogRatio <- function(x, ..., nrow <- ceiling(p / ncol) ## Grouping + groups <- get_variable(x, which = groups) if (is.null(groups) || all(is.na(groups))) { grp <- list(all = z) groups <- rep("all", m) diff --git a/R/simplex.R b/R/simplex.R index 80f03b4..554b5f7 100644 --- a/R/simplex.R +++ b/R/simplex.R @@ -40,9 +40,7 @@ setMethod( z <- x * y z <- as_composition(z) - - set_samples(z) <- get_samples(x) - set_groups(z) <- get_groups(x) + z@extra <- get_extra(x) z } @@ -94,8 +92,7 @@ setMethod( arkhe::assert_length(y, 1L) z <- x ^ y z <- as_composition(z) - set_samples(z) <- get_samples(x) - set_groups(z) <- get_groups(x) + z@extra <- get_extra(x) z } ) diff --git a/R/subset.R b/R/subset.R index d4c2b8b..a7cab63 100644 --- a/R/subset.R +++ b/R/subset.R @@ -3,7 +3,6 @@ NULL # Extract ====================================================================== -## CompositionMatrix ----------------------------------------------------------- .subscript1 <- function(x, i) { x@.Data[i] } @@ -12,9 +11,12 @@ NULL ## Rows if (missing(i)) i <- seq_len(nrow(x)) if (is.character(i)) i <- match(i, dimnames(x)[1L]) - samples <- x@samples[i] - groups <- x@groups[i] - totals <- x@totals[i] + totals <- get_totals(x)[i] + if (has_extra(x)) { + extra <- lapply(X = get_extra(x), FUN = function(val, i) { val[i] }, i = i) + } else { + extra <- list() + } ## Columns if (missing(j)) j <- seq_len(ncol(x)) @@ -30,7 +32,7 @@ NULL # z <- z / tot # } - methods::initialize(x, z, samples = samples, groups = groups, totals = totals) + methods::initialize(x, z, totals = totals, extra = extra) } wrong_dimensions <- function(i, j) { @@ -38,6 +40,7 @@ wrong_dimensions <- function(i, j) { stop(msg, call. = FALSE) } +## CompositionMatrix ----------------------------------------------------------- #' @export #' @rdname subset #' @aliases [,CompositionMatrix,missing,missing,missing-method diff --git a/R/transform_alr.R b/R/transform_alr.R index 2aceaca..5f24b5a 100644 --- a/R/transform_alr.R +++ b/R/transform_alr.R @@ -36,9 +36,8 @@ setMethod( order = order(ordering), base = base, weights = w, - totals = object@totals, - samples = object@samples, - groups = object@groups + totals = get_totals(object), + extra = get_extra(object) ) } ) diff --git a/R/transform_clr.R b/R/transform_clr.R index e09a56e..e5752f3 100644 --- a/R/transform_clr.R +++ b/R/transform_clr.R @@ -31,9 +31,8 @@ setMethod( order = seq_len(J), base = base, weights = w, - totals = object@totals, - samples = object@samples, - groups = object@groups + totals = get_totals(object), + extra = get_extra(object) ) } ) diff --git a/R/transform_ilr.R b/R/transform_ilr.R index 26fd6a4..6a15638 100644 --- a/R/transform_ilr.R +++ b/R/transform_ilr.R @@ -68,9 +68,8 @@ setMethod( order = seq_len(D), base = base, weights = rep(1 / D, D), - totals = object@totals, - samples = object@samples, - groups = object@groups + totals = get_totals(object), + extra = get_extra(object) ) } ) diff --git a/R/transform_inverse.R b/R/transform_inverse.R index 4d616e0..49af5ce 100644 --- a/R/transform_inverse.R +++ b/R/transform_inverse.R @@ -18,9 +18,8 @@ setMethod( dimnames(y) <- list(rownames(object), object@parts) .CompositionMatrix( y, - totals = object@totals, - samples = object@samples, - groups = object@groups + totals = get_totals(object), + extra = get_extra(object) ) } ) @@ -43,9 +42,8 @@ setMethod( .CompositionMatrix( y, - totals = object@totals, - samples = object@samples, - groups = object@groups + totals = get_totals(object), + extra = get_extra(object) ) } ) @@ -67,9 +65,8 @@ setMethod( .CompositionMatrix( y, - totals = object@totals, - samples = object@samples, - groups = object@groups + totals = get_totals(object), + extra = get_extra(object) ) } ) diff --git a/R/transform_lr.R b/R/transform_lr.R index 40ece44..c9ac59d 100644 --- a/R/transform_lr.R +++ b/R/transform_lr.R @@ -36,9 +36,8 @@ setMethod( ratio = r, order = seq_len(J), weights = w, - totals = object@totals, - samples = object@samples, - groups = object@groups + totals = get_totals(object), + extra = get_extra(object) ) } ) diff --git a/R/transform_plr.R b/R/transform_plr.R index 27fe21e..a62ab0f 100644 --- a/R/transform_plr.R +++ b/R/transform_plr.R @@ -48,9 +48,8 @@ setMethod( order = order(ordering), base = H, weights = rep(1 / J, J), - totals = object@totals, - samples = object@samples, - groups = object@groups + totals = get_totals(object), + extra = get_extra(object) ) } ) diff --git a/R/validate.R b/R/validate.R index 5ca2af0..17828bf 100644 --- a/R/validate.R +++ b/R/validate.R @@ -17,15 +17,12 @@ setValidity( method = function(object) { ## Get data n <- nrow(object) - samples <- object@samples - groups <- object@groups totals <- object@totals + extra <- object@extra cnd <- list( - arkhe::validate(arkhe::assert_missing(samples)), - arkhe::validate(arkhe::assert_length(samples, n, empty = FALSE)), - arkhe::validate(arkhe::assert_length(groups, n, empty = FALSE)), arkhe::validate(arkhe::assert_length(totals, n, empty = FALSE)), + # arkhe::validate(arkhe::assert_lengths(extra, n)), arkhe::validate(arkhe::assert_positive(object, strict = FALSE, na.rm = TRUE)) ) arkhe::check_class(object, cnd) @@ -37,27 +34,23 @@ setValidity( Class = "LogRatio", method = function(object) { ## Get data - data <- object@.Data parts <- object@parts ratio <- object@ratio order <- object@order base <- object@base weights <- object@weights - samples <- object@samples - groups <- object@groups totals <- object@totals + extra <- object@extra n <- nrow(object) m <- length(parts) cnd <- list( - arkhe::validate(arkhe::assert_missing(data)), - arkhe::validate(arkhe::assert_infinite(data)), - arkhe::validate(arkhe::assert_missing(samples)), - arkhe::validate(arkhe::assert_length(samples, n, empty = FALSE)), - arkhe::validate(arkhe::assert_length(groups, n, empty = FALSE)), + arkhe::validate(arkhe::assert_missing(object)), + arkhe::validate(arkhe::assert_infinite(object)), arkhe::validate(arkhe::assert_length(totals, n, empty = FALSE)), + # arkhe::validate(arkhe::assert_lengths(extra, n)), arkhe::validate(arkhe::assert_length(order, m, empty = FALSE)) ) diff --git a/R/zzz.R b/R/zzz.R index 8948ace..42e23d2 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,7 +1,6 @@ .onLoad <- function(libname, pkgname) { op <- options() op.nexus <- list( - nexus.autodetect = TRUE, nexus.verbose = interactive() ) toset <- !(names(op.nexus) %in% names(op)) diff --git a/inst/examples/ex-aggregate.R b/inst/examples/ex-aggregate.R index 69a4f4e..9ff3ebb 100644 --- a/inst/examples/ex-aggregate.R +++ b/inst/examples/ex-aggregate.R @@ -2,13 +2,10 @@ data("slides") ## Coerce to a compositional matrix -coda <- as_composition(slides, sample = 2, group = 1) +coda <- as_composition(slides) -## Compositional mean by sample -aggregate(coda, by = get_samples(coda), FUN = mean) +## Compositional mean by slide +aggregate(coda, by = slides$slide, FUN = mean) -## Compositional mean by group -aggregate(coda, by = get_groups(coda), FUN = mean) - -## Metric variance by group -aggregate(coda, by = get_groups(coda), FUN = metric_var) +## Metric variance by slide +aggregate(coda, by = slides$slide, FUN = metric_var) diff --git a/inst/examples/ex-barplot.R b/inst/examples/ex-barplot.R index 122e1fd..9747196 100644 --- a/inst/examples/ex-barplot.R +++ b/inst/examples/ex-barplot.R @@ -10,7 +10,8 @@ barplot(coda, order = 2) ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, groups = 1) # Use ceramic types for grouping +coda <- as_composition(kommos) -barplot(coda, order = 1) -barplot(coda, order = 1, horiz = FALSE) +## Use ceramic types for grouping +barplot(coda, groups = "type", order = 1) +barplot(coda, groups = "type", order = 1, horiz = FALSE) diff --git a/inst/examples/ex-condense.R b/inst/examples/ex-condense.R index 9387423..17f87b4 100644 --- a/inst/examples/ex-condense.R +++ b/inst/examples/ex-condense.R @@ -2,10 +2,7 @@ data("slides") ## Coerce to a compositional matrix -coda <- as_composition(slides, sample = 2, group = 1) - -## Compositional mean by sample -condense(coda, by = get_samples(coda)) +coda <- as_composition(slides) ## Compositional mean by group -condense(coda, by = get_groups(coda)) +condense(coda, by = "slide") diff --git a/inst/examples/ex-density.R b/inst/examples/ex-density.R index 19808f0..913d320 100644 --- a/inst/examples/ex-density.R +++ b/inst/examples/ex-density.R @@ -1,9 +1,9 @@ ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, groups = 1) # Use ceramic types for grouping +coda <- as_composition(kommos) # Use ceramic types for grouping ## Log ratio clr <- transform_clr(coda) plot(clr, groups = NULL, flip = TRUE, border = "black", col = NA) -plot(clr, flip = TRUE) +plot(clr, groups = "type", flip = TRUE) diff --git a/inst/examples/ex-describe.R b/inst/examples/ex-describe.R index 9ce7b32..088f4ea 100644 --- a/inst/examples/ex-describe.R +++ b/inst/examples/ex-describe.R @@ -2,7 +2,7 @@ data("slides") ## Coerce to compositional data -coda <- as_composition(slides, sample = 1, group = 2) +coda <- as_composition(slides) ## Quick description describe(coda) diff --git a/inst/examples/ex-mutators.R b/inst/examples/ex-mutators.R index d5085ef..9198ee1 100644 --- a/inst/examples/ex-mutators.R +++ b/inst/examples/ex-mutators.R @@ -3,8 +3,5 @@ data("slides") head(slides) ## Coerce to compositional data -coda <- as_composition(slides, samples = 2, groups = 1) +coda <- as_composition(slides) head(as_features(coda)) - -get_samples(coda) -get_groups(coda) diff --git a/inst/examples/ex-outliers.R b/inst/examples/ex-outliers.R index 40ed52a..1980741 100644 --- a/inst/examples/ex-outliers.R +++ b/inst/examples/ex-outliers.R @@ -1,7 +1,7 @@ ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, groups = 1) # Use ceramic types for grouping +coda <- as_composition(kommos) # Use ceramic types for grouping ## Detect outliers out <- outliers(coda, groups = NULL, method = "mcd") @@ -11,7 +11,7 @@ plot(out, type = "distance") plot(out, type = "qqplot") ## Detect outliers by group -out <- outliers(coda[, 1:15], method = "mcd") +out <- outliers(coda[, 1:15], groups = "type", method = "mcd") plot(out, type = "dotchart", select = 1, robust = FALSE) plot(out, type = "dotchart", select = 2, robust = FALSE) diff --git a/inst/examples/ex-pca.R b/inst/examples/ex-pca.R index 1c26f26..fff8277 100644 --- a/inst/examples/ex-pca.R +++ b/inst/examples/ex-pca.R @@ -1,7 +1,7 @@ ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, groups = 1) # Use ceramic types for grouping +coda <- as_composition(kommos) # Use ceramic types for grouping ## Centered log-ratio clr <- transform_clr(coda) @@ -10,5 +10,5 @@ clr <- transform_clr(coda) X <- pca(clr, scale = FALSE) ## Explore results -viz_individuals(X, highlight = get_groups(coda), pch = 16) +viz_individuals(X, highlight = "type", pch = 16) viz_variables(X) diff --git a/inst/examples/ex-plot.R b/inst/examples/ex-plot.R index e48b4f8..759f058 100644 --- a/inst/examples/ex-plot.R +++ b/inst/examples/ex-plot.R @@ -1,7 +1,7 @@ ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, groups = 1) # Use ceramic types for grouping +coda <- as_composition(kommos) # Use ceramic types for grouping -plot(coda[, 1:6, drop = FALSE]) +plot(coda[, 1:6, drop = FALSE], groups = "type") plot(coda[, 1:6, drop = FALSE], groups = NULL) diff --git a/inst/examples/ex-split.R b/inst/examples/ex-split.R index 4a95c5f..7f1aa30 100644 --- a/inst/examples/ex-split.R +++ b/inst/examples/ex-split.R @@ -11,7 +11,4 @@ X <- data.frame( Y <- as_composition(X) ## Split by group -split(Y, f = get_groups(Y)) - -## Split by sample -split(Y, f = get_samples(Y)) +split(Y, f = X$groups) diff --git a/inst/tinytest/_snaps/aggregate_group.rds b/inst/tinytest/_snaps/aggregate_group.rds index 631027e..747dcf1 100644 Binary files a/inst/tinytest/_snaps/aggregate_group.rds and b/inst/tinytest/_snaps/aggregate_group.rds differ diff --git a/inst/tinytest/_snaps/aggregate_sample.rds b/inst/tinytest/_snaps/aggregate_sample.rds deleted file mode 100644 index fff395a..0000000 Binary files a/inst/tinytest/_snaps/aggregate_sample.rds and /dev/null differ diff --git a/inst/tinytest/_snaps/condense.rds b/inst/tinytest/_snaps/condense.rds index 052967b..3283447 100644 Binary files a/inst/tinytest/_snaps/condense.rds and b/inst/tinytest/_snaps/condense.rds differ diff --git a/inst/tinytest/_snaps/covariance_sigma.rds b/inst/tinytest/_snaps/covariance_sigma.rds index 03cf0c5..2aae463 100644 Binary files a/inst/tinytest/_snaps/covariance_sigma.rds and b/inst/tinytest/_snaps/covariance_sigma.rds differ diff --git a/inst/tinytest/_snaps/covariance_tau.rds b/inst/tinytest/_snaps/covariance_tau.rds index e318c81..c449fa6 100644 Binary files a/inst/tinytest/_snaps/covariance_tau.rds and b/inst/tinytest/_snaps/covariance_tau.rds differ diff --git a/inst/tinytest/_snaps/dist_euclidean.rds b/inst/tinytest/_snaps/dist_euclidean.rds index 1d0dc47..1de53b2 100644 Binary files a/inst/tinytest/_snaps/dist_euclidean.rds and b/inst/tinytest/_snaps/dist_euclidean.rds differ diff --git a/inst/tinytest/_snaps/dist_mahalanobis.rds b/inst/tinytest/_snaps/dist_mahalanobis.rds index 7c8c3a3..99213de 100644 Binary files a/inst/tinytest/_snaps/dist_mahalanobis.rds and b/inst/tinytest/_snaps/dist_mahalanobis.rds differ diff --git a/inst/tinytest/_snaps/features.rds b/inst/tinytest/_snaps/features.rds index ce6ec38..47e9621 100644 Binary files a/inst/tinytest/_snaps/features.rds and b/inst/tinytest/_snaps/features.rds differ diff --git a/inst/tinytest/_snaps/features_clr.rds b/inst/tinytest/_snaps/features_clr.rds index c7d44b7..bc3f813 100644 Binary files a/inst/tinytest/_snaps/features_clr.rds and b/inst/tinytest/_snaps/features_clr.rds differ diff --git a/inst/tinytest/_snaps/ilr_base_classic.rds b/inst/tinytest/_snaps/ilr_base_classic.rds index 407c767..68bf051 100644 Binary files a/inst/tinytest/_snaps/ilr_base_classic.rds and b/inst/tinytest/_snaps/ilr_base_classic.rds differ diff --git a/inst/tinytest/_snaps/margin.rds b/inst/tinytest/_snaps/margin.rds index d05b570..5078597 100644 Binary files a/inst/tinytest/_snaps/margin.rds and b/inst/tinytest/_snaps/margin.rds differ diff --git a/inst/tinytest/_snaps/mean.rds b/inst/tinytest/_snaps/mean.rds index 686b9f7..67a93a2 100644 Binary files a/inst/tinytest/_snaps/mean.rds and b/inst/tinytest/_snaps/mean.rds differ diff --git a/inst/tinytest/_snaps/missing_multiplicative.rds b/inst/tinytest/_snaps/missing_multiplicative.rds index 6ba7a19..82bc622 100644 Binary files a/inst/tinytest/_snaps/missing_multiplicative.rds and b/inst/tinytest/_snaps/missing_multiplicative.rds differ diff --git a/inst/tinytest/_snaps/pip.rds b/inst/tinytest/_snaps/pip.rds index 2732b4a..9bd3df6 100644 Binary files a/inst/tinytest/_snaps/pip.rds and b/inst/tinytest/_snaps/pip.rds differ diff --git a/inst/tinytest/_snaps/quantile.rds b/inst/tinytest/_snaps/quantile.rds index a214b49..d202f21 100644 Binary files a/inst/tinytest/_snaps/quantile.rds and b/inst/tinytest/_snaps/quantile.rds differ diff --git a/inst/tinytest/_snaps/scale.rds b/inst/tinytest/_snaps/scale.rds index 0f07191..892cc1c 100644 Binary files a/inst/tinytest/_snaps/scale.rds and b/inst/tinytest/_snaps/scale.rds differ diff --git a/inst/tinytest/_snaps/transform_alr.rds b/inst/tinytest/_snaps/transform_alr.rds index 3fd8595..7a0a8fb 100644 Binary files a/inst/tinytest/_snaps/transform_alr.rds and b/inst/tinytest/_snaps/transform_alr.rds differ diff --git a/inst/tinytest/_snaps/transform_clr.rds b/inst/tinytest/_snaps/transform_clr.rds index f86560b..8f3a728 100644 Binary files a/inst/tinytest/_snaps/transform_clr.rds and b/inst/tinytest/_snaps/transform_clr.rds differ diff --git a/inst/tinytest/_snaps/transform_ilr.rds b/inst/tinytest/_snaps/transform_ilr.rds index 83eadd1..7921c3e 100644 Binary files a/inst/tinytest/_snaps/transform_ilr.rds and b/inst/tinytest/_snaps/transform_ilr.rds differ diff --git a/inst/tinytest/_snaps/transform_lr.rds b/inst/tinytest/_snaps/transform_lr.rds index f0d1f04..22c9011 100644 Binary files a/inst/tinytest/_snaps/transform_lr.rds and b/inst/tinytest/_snaps/transform_lr.rds differ diff --git a/inst/tinytest/_snaps/transform_plr.rds b/inst/tinytest/_snaps/transform_plr.rds index 603ce46..1d7c3e9 100644 Binary files a/inst/tinytest/_snaps/transform_plr.rds and b/inst/tinytest/_snaps/transform_plr.rds differ diff --git a/inst/tinytest/_snaps/variation.rds b/inst/tinytest/_snaps/variation.rds index da82c47..7902c1f 100644 Binary files a/inst/tinytest/_snaps/variation.rds and b/inst/tinytest/_snaps/variation.rds differ diff --git a/inst/tinytest/_snaps/variation_array.rds b/inst/tinytest/_snaps/variation_array.rds deleted file mode 100644 index cde2da0..0000000 Binary files a/inst/tinytest/_snaps/variation_array.rds and /dev/null differ diff --git a/inst/tinytest/_snaps/zero_multiplicative.rds b/inst/tinytest/_snaps/zero_multiplicative.rds index a9d907d..4f3db45 100644 Binary files a/inst/tinytest/_snaps/zero_multiplicative.rds and b/inst/tinytest/_snaps/zero_multiplicative.rds differ diff --git a/inst/tinytest/test_coerce.R b/inst/tinytest/test_coerce.R index bb3f88b..2747a85 100644 --- a/inst/tinytest/test_coerce.R +++ b/inst/tinytest/test_coerce.R @@ -1,6 +1,6 @@ # Data with groups ============================================================= data("slides") -coda <- as_composition(slides, sample = 2, group = 1) +coda <- as_composition(slides) expect_equal_to_reference(as_features(coda), file = "_snaps/features.rds") clr <- transform_clr(coda, weights = FALSE) diff --git a/inst/tinytest/test_condense.R b/inst/tinytest/test_condense.R index 39474de..d4cb395 100644 --- a/inst/tinytest/test_condense.R +++ b/inst/tinytest/test_condense.R @@ -1,11 +1,11 @@ data("slides") -coda <- as_composition(slides, sample = 2, group = 1) +coda <- as_composition(slides) ## Compositional mean by sample by <- c("A", "A", "C", "D", "B", "E", "C", "B", "E", "D", "C", "E", "B", "E", "C", "D", "B", "C", "A", "B", "A", "C", "B", "A", "E") flat <- condense(coda, by = by) -expect_equal_to_reference(as_features(flat), file = "_snaps/condense.rds") +expect_equal_to_reference(as.data.frame(flat), file = "_snaps/condense.rds") ## With zeros X1 <- data.frame( diff --git a/inst/tinytest/test_mutators.R b/inst/tinytest/test_mutators.R index 16fabd4..6183c7a 100644 --- a/inst/tinytest/test_mutators.R +++ b/inst/tinytest/test_mutators.R @@ -1,35 +1,6 @@ data("hongite") coda <- as_composition(hongite) -# CompositionMatrix samples ==================================================== -set_samples(coda) <- rep(c("A", "B", "C", "D", "E"), each = 5) -expect_equal(get_samples(coda), rep(c("A", "B", "C", "D", "E"), each = 5)) -expect_true(any_replicated(coda)) -expect_true(all(is_replicated(coda))) - -set_samples(coda) <- NULL -expect_equal(get_samples(coda), paste0("S", seq_len(25))) - -# Invalid values -# Try wrong length -expect_error(set_samples(coda) <- LETTERS, class = "arkhe_error_class") - -# CompositionMatrix groups ===================================================== -expect_equal(get_groups(coda), rep(NA_character_, nrow(coda))) -expect_false(any_assigned(coda)) - -set_groups(coda) <- rep(c("A", "B", "C", "D", NA), each = 5) -expect_equal(get_groups(coda), rep(c("A", "B", "C", "D", NA), each = 5)) -expect_true(any_assigned(coda)) -expect_equal(is_assigned(coda), rep(c(TRUE, FALSE), c(20, 5))) - -set_groups(coda) <- NULL -expect_false(any_assigned(coda)) - -# Invalid values -# Try wrong length -expect_error(set_groups(coda) <- LETTERS, class = "arkhe_error_class") - # CompositionMatrix totals ===================================================== mtx <- matrix(sample(1:100, 75, TRUE), ncol = 5) coda <- as_composition(mtx) diff --git a/inst/tinytest/test_outliers.R b/inst/tinytest/test_outliers.R index 50b859c..fb74d19 100644 --- a/inst/tinytest/test_outliers.R +++ b/inst/tinytest/test_outliers.R @@ -3,7 +3,7 @@ coda <- as_composition(hongite) # Detect outliers ============================================================== out <- outliers(coda) -expect_equal_to_reference(out, file = "_snaps/outliers.rds") +# expect_equal_to_reference(out, file = "_snaps/outliers.rds") # expect_equal_to_reference(as_features(out), file = "_snaps/features_outliers.rds") # Plot ========================================================================= diff --git a/inst/tinytest/test_plot.R b/inst/tinytest/test_plot.R index 457d561..824791d 100644 --- a/inst/tinytest/test_plot.R +++ b/inst/tinytest/test_plot.R @@ -24,11 +24,11 @@ if (at_home()) { plot_barplot_order <- function() barplot(coda, order = 2) expect_snapshot_plot(plot_barplot_order, "plot_barplot_order") - set_groups(coda) <- rep(1:5, 5) - plot_barplot_group <- function() barplot(coda, order = 2) + plot_barplot_group <- function() barplot(coda, groups = rep(1:5, 5), order = 2) expect_snapshot_plot(plot_barplot_group, "plot_barplot_group") - plot_barplot_vertical <- function() barplot(coda, order = NULL, horiz = FALSE) + plot_barplot_vertical <- function() barplot(coda, groups = rep(1:5, 5), + order = NULL, horiz = FALSE) expect_snapshot_plot(plot_barplot_vertical, "plot_barplot_vertical") # Density ==================================================================== @@ -36,7 +36,7 @@ if (at_home()) { if (getRversion() >= "4.4.0") { clr <- transform_clr(coda) - plot_ratio <- function() plot(clr) + plot_ratio <- function() plot(clr, groups = rep(1:5, 5)) expect_snapshot_plot(plot_ratio, "plot_ratio") } } diff --git a/inst/tinytest/test_statistics.R b/inst/tinytest/test_statistics.R index 159d36d..4c5505f 100644 --- a/inst/tinytest/test_statistics.R +++ b/inst/tinytest/test_statistics.R @@ -1,10 +1,8 @@ # Aggregate ==================================================================== data("slides") -petro <- as_composition(slides, group = 1, sample = 2) +petro <- as_composition(slides) -expect_equal_to_reference(aggregate(petro, by = get_samples(petro), FUN = mean), - file = "_snaps/aggregate_sample.rds") -expect_equal_to_reference(aggregate(petro, by = get_groups(petro), FUN = mean), +expect_equal_to_reference(aggregate(petro, by = slides$analyst, FUN = mean), file = "_snaps/aggregate_group.rds") # Mean ========================================================================= diff --git a/inst/tinytest/test_subset.R b/inst/tinytest/test_subset.R index b927abf..4538fc5 100644 --- a/inst/tinytest/test_subset.R +++ b/inst/tinytest/test_subset.R @@ -115,13 +115,6 @@ cts <- as_composition(mtx) expect_identical(get_totals(cts[1:5, , drop = FALSE]), get_totals(cts)[1:5]) -set_groups(cts) <- rep(c("A", "B"), each = 10) -set_samples(cts) <- rep(c("X", "Y"), times = 10) - -tmp <- cts[1:10, , drop = FALSE] -expect_identical(get_groups(tmp), rep("A", 10)) -expect_identical(get_samples(tmp), rep(c("X", "Y"), times = 5)) - # Transpose ==================================================================== mtx <- matrix(data = sample(2:10, 100, TRUE), ncol = 5) cts <- as_composition(mtx) diff --git a/man/CompositionMatrix-class.Rd b/man/CompositionMatrix-class.Rd index 0602360..2ded9b1 100644 --- a/man/CompositionMatrix-class.Rd +++ b/man/CompositionMatrix-class.Rd @@ -14,10 +14,7 @@ An S4 class to represent compositional data. \item{\code{totals}}{A \code{\link{numeric}} vector to store the absolute row sums (before the closure of the compositions).} -\item{\code{samples}}{A \code{\link{character}} vector to store the sample identifiers -(allows duplicates in case of repeated measurements).} - -\item{\code{groups}}{A \code{\link{character}} vector to store the group names.} +\item{\code{extra}}{A \code{\link{list}} of extra variables.} }} \note{ diff --git a/man/LogRatio-class.Rd b/man/LogRatio-class.Rd index ecdea06..e7939d0 100644 --- a/man/LogRatio-class.Rd +++ b/man/LogRatio-class.Rd @@ -24,10 +24,7 @@ S4 classes to represent log-ratio data transformations. \item{\code{totals}}{A \code{\link{numeric}} vector to store the absolute row sums (before the closure of the compositions).} -\item{\code{samples}}{A \code{\link{character}} vector to store the sample identifiers -(allows duplicates in case of repeated measurements).} - -\item{\code{groups}}{A \code{\link{character}} vector to store the group names.} +\item{\code{extra}}{A \code{\link{list}} of extra variables.} \item{\code{parts}}{A \code{\link{character}} vector to store the part names.} diff --git a/man/aggregate.Rd b/man/aggregate.Rd index 327be71..6716282 100644 --- a/man/aggregate.Rd +++ b/man/aggregate.Rd @@ -12,7 +12,8 @@ \item{x}{A \code{\linkS4class{CompositionMatrix}} object.} \item{by}{A \code{vector} or a list of grouping elements, each as long as the -variables in \code{x}. The elements are coerced to factors before use.} +variables in \code{x}. The elements are coerced to factors before use +(in the sense that \code{\link[=as.factor]{as.factor(by)}} defines the grouping).} \item{FUN}{A \code{\link{function}} to compute the summary statistics.} @@ -36,16 +37,13 @@ returns the result. data("slides") ## Coerce to a compositional matrix -coda <- as_composition(slides, sample = 2, group = 1) +coda <- as_composition(slides) -## Compositional mean by sample -aggregate(coda, by = get_samples(coda), FUN = mean) +## Compositional mean by slide +aggregate(coda, by = slides$slide, FUN = mean) -## Compositional mean by group -aggregate(coda, by = get_groups(coda), FUN = mean) - -## Metric variance by group -aggregate(coda, by = get_groups(coda), FUN = metric_var) +## Metric variance by slide +aggregate(coda, by = slides$slide, FUN = metric_var) } \seealso{ Other statistics: diff --git a/man/as_composition.Rd b/man/as_composition.Rd index 9a70519..466e99f 100644 --- a/man/as_composition.Rd +++ b/man/as_composition.Rd @@ -15,28 +15,16 @@ as_composition(from, ...) \S4method{as_composition}{matrix}(from) -\S4method{as_composition}{data.frame}( - from, - samples = NULL, - groups = NULL, - auto = getOption("nexus.autodetect"), - verbose = getOption("nexus.verbose") -) +\S4method{as_composition}{data.frame}(from, parts = NULL, verbose = getOption("nexus.verbose")) } \arguments{ \item{from}{A \code{\link{matrix}} or \code{\link{data.frame}} to be coerced.} \item{...}{Currently not used.} -\item{samples}{An \code{\link{integer}} giving the index of the column to be used for -sample identification: allows duplicates to identify replicated measurements. -If \code{NULL} (the default), row names will be used as sample IDs.} - -\item{groups}{An \code{\link{integer}} giving the index of the column to be used to -group the samples. If \code{NULL} (the default), no grouping is stored.} - -\item{auto}{A \code{\link{logical}} scalar: try to automatically detect \code{codes}, -\code{samples} and \code{groups} columns?} +\item{parts}{A \code{vector} giving the index of the column to be used a +compositional parts. If \code{NULL} (the default), all \code{\link{double}} columns will be +used.} \item{verbose}{A \code{\link{logical}} scalar: should \R report extra information on progress?} @@ -48,23 +36,8 @@ A \code{\linkS4class{CompositionMatrix}} object. Coerces an object to a \code{CompositionMatrix} object. } \details{ -The \code{\linkS4class{CompositionMatrix}} class has special slots: -\itemize{ -\item \code{samples} for \link[=samples]{repeated measurements/observation}, -\item \code{groups} to \link[=group]{group data by site/area}. -} - -When coercing a \code{data.frame} to a \code{\linkS4class{CompositionMatrix}} object, an -attempt is made to automatically assign values to these slots by mapping -column names (case insensitive, plural insensitive). This behavior can be -disabled by setting \code{options(nexus.autodetect = FALSE)} or overridden by -explicitly specifying the columns to be used. - See \code{vignette("nexus")}. } -\note{ -All non-numeric variable will be removed. -} \examples{ ## Create a count matrix A1 <- matrix(data = sample(1:100, 100, TRUE), nrow = 20) diff --git a/man/as_features.Rd b/man/as_features.Rd index d290bfa..3f65a81 100644 --- a/man/as_features.Rd +++ b/man/as_features.Rd @@ -20,7 +20,7 @@ as_features(from, ...) \item{...}{Currently not used.} } \value{ -A \code{\link{data.frame}} with all informations as extra columns. +A \code{\link{data.frame}}. } \description{ Converts an object to a collection of features. diff --git a/man/barplot.Rd b/man/barplot.Rd index ec6be9a..77c6a15 100644 --- a/man/barplot.Rd +++ b/man/barplot.Rd @@ -9,9 +9,9 @@ \S4method{barplot}{CompositionMatrix}( height, ..., + groups = NULL, order = NULL, decreasing = FALSE, - groups = get_groups(height), horiz = TRUE, xlab = NULL, ylab = NULL, @@ -28,15 +28,17 @@ \item{...}{Further parameters to be passed to \code{\link[graphics:barplot]{graphics::barplot()}}.} +\item{groups}{A \code{vector} of grouping elements, as long as the variables in +\code{height}. If a single \code{character} string is passed, it must be the name of a +categorical variable from the original dataset. +If set, a matrix of panels defined by \code{groups} will be drawn.} + \item{order}{An \code{\link{integer}} vector giving the index of the column to be used for the ordering of the data.} \item{decreasing}{A \code{\link{logical}} scalar: should the sort order be increasing or decreasing?} -\item{groups}{A \code{vector} of grouping elements, as long as the variables in -\code{height}. If set, a matrix of panels defined by \code{groups} will be drawn.} - \item{horiz}{A \code{\link{logical}} scalar. If \code{FALSE}, the bars are drawn vertically with the first bar to the left. If \code{TRUE} (the default), the bars are drawn horizontally with the first at the bottom.} @@ -78,10 +80,11 @@ barplot(coda, order = 2) ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, groups = 1) # Use ceramic types for grouping +coda <- as_composition(kommos) -barplot(coda, order = 1) -barplot(coda, order = 1, horiz = FALSE) +## Use ceramic types for grouping +barplot(coda, groups = "type", order = 1) +barplot(coda, groups = "type", order = 1, horiz = FALSE) } \seealso{ Other plot methods: diff --git a/man/condense.Rd b/man/condense.Rd index e581b77..c6c31bc 100644 --- a/man/condense.Rd +++ b/man/condense.Rd @@ -9,14 +9,17 @@ \usage{ condense(x, ...) -\S4method{condense}{CompositionMatrix}(x, by = get_samples(x), ...) +\S4method{condense}{CompositionMatrix}(x, by, ...) } \arguments{ \item{x}{A \code{\linkS4class{CompositionMatrix}} object.} \item{...}{Further arguments to be passed to \code{\link[=mean]{mean()}}.} -\item{by}{A \code{vector} of grouping elements, as long as the variables in \code{x}.} +\item{by}{A \code{vector} of grouping elements, as long as the variables in \code{x} +(in the sense that \code{\link[=as.factor]{as.factor(by)}} defines the grouping). +If a single \code{character} string is passed, it must be the name of a +categorical variable from the original dataset.} } \value{ A \code{\linkS4class{CompositionMatrix}} object. @@ -29,13 +32,10 @@ Splits the data into subsets and computes compositional mean for each. data("slides") ## Coerce to a compositional matrix -coda <- as_composition(slides, sample = 2, group = 1) - -## Compositional mean by sample -condense(coda, by = get_samples(coda)) +coda <- as_composition(slides) ## Compositional mean by group -condense(coda, by = get_groups(coda)) +condense(coda, by = "slide") } \seealso{ \code{\link[=mean]{mean()}}, \code{\link[=aggregate]{aggregate()}} diff --git a/man/describe.Rd b/man/describe.Rd index 6f0f499..a0ae26c 100644 --- a/man/describe.Rd +++ b/man/describe.Rd @@ -22,7 +22,7 @@ Describes an object. data("slides") ## Coerce to compositional data -coda <- as_composition(slides, sample = 1, group = 2) +coda <- as_composition(slides) ## Quick description describe(coda) diff --git a/man/figures/README-groups-1.png b/man/figures/README-groups-1.png deleted file mode 100644 index d1341d3..0000000 Binary files a/man/figures/README-groups-1.png and /dev/null differ diff --git a/man/groups.Rd b/man/groups.Rd index 4f7c2cc..b373b8a 100644 --- a/man/groups.Rd +++ b/man/groups.Rd @@ -3,86 +3,37 @@ \docType{methods} \name{groups} \alias{groups} -\alias{any_assigned} -\alias{any_assigned-method} -\alias{is_assigned} -\alias{is_assigned-method} \alias{get_groups} \alias{get_groups-method} -\alias{set_groups<-} -\alias{set_groups-method} -\alias{is_assigned,CompositionMatrix-method} -\alias{is_assigned,LogRatio-method} -\alias{any_assigned,CompositionMatrix-method} -\alias{any_assigned,LogRatio-method} -\alias{get_groups,CompositionMatrix-method} -\alias{get_groups,LogRatio-method} \alias{get_groups,OutlierIndex-method} -\alias{set_groups<-,CompositionMatrix-method} -\alias{set_groups,CompositionMatrix-method} \title{Working With Groups} \usage{ -any_assigned(x) - -is_assigned(x) - get_groups(x) -set_groups(x) <- value - -\S4method{is_assigned}{CompositionMatrix}(x) - -\S4method{is_assigned}{LogRatio}(x) - -\S4method{any_assigned}{CompositionMatrix}(x) - -\S4method{any_assigned}{LogRatio}(x) - -\S4method{get_groups}{CompositionMatrix}(x) - -\S4method{get_groups}{LogRatio}(x) - \S4method{get_groups}{OutlierIndex}(x) - -\S4method{set_groups}{CompositionMatrix}(x) <- value } \arguments{ \item{x}{An object from which to get or set \code{groups}.} - -\item{value}{A possible value for the \code{groups} of \code{x}.} } \value{ \itemize{ -\item \code{set_groups()} returns an object of the same sort as \code{x} with the new -group names assigned. \item \code{get_groups()} returns a \code{\link{character}} vector giving the group names of \code{x}. -\item \code{any_assigned()} returns a \code{\link{logical}} scalar specifying whether or not \code{x} -has groups. -\item \code{is_assigned()} returns a \code{\link{logical}} vector specifying whether or not an -observation belongs to a group. } } \description{ Retrieves or defines the groups to which the observations belong. } -\details{ -See \code{vignette("nexus")}. -} \examples{ ## Data from Aitchison 1986 data("slides") head(slides) ## Coerce to compositional data -coda <- as_composition(slides, samples = 2, groups = 1) +coda <- as_composition(slides) head(as_features(coda)) - -get_samples(coda) -get_groups(coda) } \seealso{ Other mutators: -\code{\link{samples}}, \code{\link{split}()}, \code{\link{subset}()}, \code{\link{totals}} diff --git a/man/nexus-package.Rd b/man/nexus-package.Rd index 9b38d48..c8ce690 100644 --- a/man/nexus-package.Rd +++ b/man/nexus-package.Rd @@ -23,9 +23,6 @@ Exploration and analysis of compositional data in the framework of Aitchison (19 \code{nexus} uses the following \code{\link[=options]{options()}} to configure behavior: \itemize{ -\item \code{nexus.autodetect}: a \code{\link{logical}} scalar. Try to automatically assign -values to the corresponding slot of a \code{CompositionMatrix} object when -coercing a \code{data.frame}? Defaults to \code{TRUE}. \item \code{nexus.verbose}: a \code{\link{logical}} scalar. Should \R report extra information on progress? Defaults to \code{TRUE}. } diff --git a/man/outliers.Rd b/man/outliers.Rd index d8b8f04..8ef7ef9 100644 --- a/man/outliers.Rd +++ b/man/outliers.Rd @@ -12,7 +12,7 @@ outliers(object, ...) \S4method{outliers}{CompositionMatrix}( object, ..., - groups = get_groups(object), + groups = NULL, method = c("mve", "mcd"), quantile = 0.975 ) @@ -23,7 +23,9 @@ outliers(object, ...) \item{...}{Extra parameters to be passed to \code{\link[MASS:cov.rob]{MASS::cov.rob()}}. Only used if \code{robust} is \code{TRUE}.} -\item{groups}{A \code{vector} of grouping elements, as long as the variables in \code{object}.} +\item{groups}{A \code{vector} of grouping elements, as long as the variables in +\code{object}. If a single \code{character} string is passed, it must be the name of a +categorical variable from the original dataset.} \item{method}{A \code{\link{character}} string specifying the method to be used. It must be one of "\code{mve}" (minimum volume ellipsoid) or "\code{mcd}" (minimum @@ -60,7 +62,7 @@ why a particular threshold should be applicable to all data sets ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, groups = 1) # Use ceramic types for grouping +coda <- as_composition(kommos) # Use ceramic types for grouping ## Detect outliers out <- outliers(coda, groups = NULL, method = "mcd") @@ -70,7 +72,7 @@ plot(out, type = "distance") plot(out, type = "qqplot") ## Detect outliers by group -out <- outliers(coda[, 1:15], method = "mcd") +out <- outliers(coda[, 1:15], groups = "type", method = "mcd") plot(out, type = "dotchart", select = 1, robust = FALSE) plot(out, type = "dotchart", select = 2, robust = FALSE) diff --git a/man/pca.Rd b/man/pca.Rd index 3a58d5a..1f98ccc 100644 --- a/man/pca.Rd +++ b/man/pca.Rd @@ -65,7 +65,7 @@ decomposition. ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, groups = 1) # Use ceramic types for grouping +coda <- as_composition(kommos) # Use ceramic types for grouping ## Centered log-ratio clr <- transform_clr(coda) @@ -74,7 +74,7 @@ clr <- transform_clr(coda) X <- pca(clr, scale = FALSE) ## Explore results -viz_individuals(X, highlight = get_groups(coda), pch = 16) +viz_individuals(X, highlight = "type", pch = 16) viz_variables(X) } \references{ diff --git a/man/plot.Rd b/man/plot.Rd index 1a4a4f0..df4ee3e 100644 --- a/man/plot.Rd +++ b/man/plot.Rd @@ -6,7 +6,7 @@ \alias{plot,CompositionMatrix,missing-method} \title{Plot Compositional Data} \usage{ -\S4method{plot}{CompositionMatrix,missing}(x, ..., margin = NULL, groups = get_groups(x)) +\S4method{plot}{CompositionMatrix,missing}(x, ..., margin = NULL, groups = NULL) } \arguments{ \item{x}{A \code{\linkS4class{CompositionMatrix}} object.} @@ -18,7 +18,9 @@ the column to be used as the third part of the ternary plots. If \code{NULL} (the default), marginal compositions will be used (i.e. the geometric mean of the non-selected parts).} -\item{groups}{A \code{vector} of grouping elements, as long as the variables in \code{x}.} +\item{groups}{A \code{vector} of grouping elements, as long as the variables in +\code{x}. If a single \code{character} string is passed, it must be the name of a +categorical variable from the original dataset.} } \value{ \code{plot()} is called for its side-effects: is results in a graphic being @@ -31,9 +33,9 @@ Displays a matrix of ternary plots. ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, groups = 1) # Use ceramic types for grouping +coda <- as_composition(kommos) # Use ceramic types for grouping -plot(coda[, 1:6, drop = FALSE]) +plot(coda[, 1:6, drop = FALSE], groups = "type") plot(coda[, 1:6, drop = FALSE], groups = NULL) } \seealso{ diff --git a/man/plot_logratio.Rd b/man/plot_logratio.Rd index 60729a2..1699c6a 100644 --- a/man/plot_logratio.Rd +++ b/man/plot_logratio.Rd @@ -9,7 +9,7 @@ \S4method{plot}{LogRatio,missing}( x, ..., - groups = get_groups(x), + groups = NULL, rug = TRUE, ticksize = 0.05, ncol = NULL, @@ -29,7 +29,9 @@ \item{...}{Further \link[graphics:par]{graphical parameters}, particularly, \code{border} and \code{col}.} -\item{groups}{A \code{vector} of grouping elements, as long as the variables in \code{x}. +\item{groups}{A \code{vector} of grouping elements, as long as the variables in +\code{x}. If a single \code{character} string is passed, it must be the name of a +categorical variable from the original dataset. If set, a matrix of panels defined by \code{groups} will be drawn.} \item{rug}{A \code{\link{logical}} scalar: should a \emph{rug} representation (1-d plot) of @@ -72,12 +74,12 @@ Displays a density plot. ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, groups = 1) # Use ceramic types for grouping +coda <- as_composition(kommos) # Use ceramic types for grouping ## Log ratio clr <- transform_clr(coda) plot(clr, groups = NULL, flip = TRUE, border = "black", col = NA) -plot(clr, flip = TRUE) +plot(clr, groups = "type", flip = TRUE) } \seealso{ Other plot methods: diff --git a/man/plot_outliers.Rd b/man/plot_outliers.Rd index 30c922c..53bea26 100644 --- a/man/plot_outliers.Rd +++ b/man/plot_outliers.Rd @@ -88,7 +88,7 @@ Plot Outliers ## Data from Day et al. 2011 data("kommos", package = "folio") # Coerce to compositional data kommos <- remove_NA(kommos, margin = 1) # Remove cases with missing values -coda <- as_composition(kommos, groups = 1) # Use ceramic types for grouping +coda <- as_composition(kommos) # Use ceramic types for grouping ## Detect outliers out <- outliers(coda, groups = NULL, method = "mcd") @@ -98,7 +98,7 @@ plot(out, type = "distance") plot(out, type = "qqplot") ## Detect outliers by group -out <- outliers(coda[, 1:15], method = "mcd") +out <- outliers(coda[, 1:15], groups = "type", method = "mcd") plot(out, type = "dotchart", select = 1, robust = FALSE) plot(out, type = "dotchart", select = 2, robust = FALSE) diff --git a/man/samples.Rd b/man/samples.Rd deleted file mode 100644 index d1ca222..0000000 --- a/man/samples.Rd +++ /dev/null @@ -1,104 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/AllGenerics.R, R/mutators.R -\docType{methods} -\name{samples} -\alias{samples} -\alias{any_replicated} -\alias{any_replicated-method} -\alias{is_replicated} -\alias{is_replicated-method} -\alias{get_samples} -\alias{get_samples-method} -\alias{set_samples<-} -\alias{set_samples-method} -\alias{is_replicated,CompositionMatrix-method} -\alias{is_replicated,LogRatio-method} -\alias{is_replicated,OutlierIndex-method} -\alias{any_replicated,CompositionMatrix-method} -\alias{any_replicated,LogRatio-method} -\alias{any_replicated,OutlierIndex-method} -\alias{get_samples,CompositionMatrix-method} -\alias{get_samples,LogRatio-method} -\alias{get_samples,OutlierIndex-method} -\alias{set_samples<-,CompositionMatrix-method} -\alias{set_samples,CompositionMatrix-method} -\title{Working With Samples} -\usage{ -any_replicated(x) - -is_replicated(x) - -get_samples(x) - -set_samples(x) <- value - -\S4method{is_replicated}{CompositionMatrix}(x) - -\S4method{is_replicated}{LogRatio}(x) - -\S4method{is_replicated}{OutlierIndex}(x) - -\S4method{any_replicated}{CompositionMatrix}(x) - -\S4method{any_replicated}{LogRatio}(x) - -\S4method{any_replicated}{OutlierIndex}(x) - -\S4method{get_samples}{CompositionMatrix}(x) - -\S4method{get_samples}{LogRatio}(x) - -\S4method{get_samples}{OutlierIndex}(x) - -\S4method{set_samples}{CompositionMatrix}(x) <- value -} -\arguments{ -\item{x}{An object from which to get or set \code{samples}.} - -\item{value}{A possible value for the \code{samples} of \code{x}.} -} -\value{ -\itemize{ -\item \code{set_samples()} returns an object of the same sort as \code{x} with the new -sample names assigned. -\item \code{get_samples()} returns a \code{\link{character}} vector giving the sample names of \code{x}. -\item \code{any_replicated()} returns a \code{\link{logical}} scalar specifying whether or not -\code{x} has replicated observations. -\item \code{is_replicated()} returns a \code{\link{logical}} vector specifying whether or not -an observation is a replicate. -} -} -\description{ -Retrieves or defines the sample names. -} -\details{ -In some situations, measurements may have been repeated (e.g. multiple -chemical analyses on the same sample). The presence of repeated -measurements can be specified by giving several observations the same -sample name. - -See \code{vignette("nexus")}. -} -\examples{ -## Data from Aitchison 1986 -data("slides") -head(slides) - -## Coerce to compositional data -coda <- as_composition(slides, samples = 2, groups = 1) -head(as_features(coda)) - -get_samples(coda) -get_groups(coda) -} -\seealso{ -Other mutators: -\code{\link{groups}}, -\code{\link{split}()}, -\code{\link{subset}()}, -\code{\link{totals}} -} -\author{ -N. Frerebeau -} -\concept{mutators} diff --git a/man/split.Rd b/man/split.Rd index ae05775..f13f93a 100644 --- a/man/split.Rd +++ b/man/split.Rd @@ -42,15 +42,11 @@ X <- data.frame( Y <- as_composition(X) ## Split by group -split(Y, f = get_groups(Y)) - -## Split by sample -split(Y, f = get_samples(Y)) +split(Y, f = X$groups) } \seealso{ Other mutators: \code{\link{groups}}, -\code{\link{samples}}, \code{\link{subset}()}, \code{\link{totals}} } diff --git a/man/subset.Rd b/man/subset.Rd index b613ef3..c52bb97 100644 --- a/man/subset.Rd +++ b/man/subset.Rd @@ -81,7 +81,6 @@ head(subcoda) \seealso{ Other mutators: \code{\link{groups}}, -\code{\link{samples}}, \code{\link{split}()}, \code{\link{totals}} } diff --git a/man/totals.Rd b/man/totals.Rd index 94e1aac..e7e3b0c 100644 --- a/man/totals.Rd +++ b/man/totals.Rd @@ -8,6 +8,7 @@ \alias{set_totals<-} \alias{set_totals-method} \alias{get_totals,CompositionMatrix-method} +\alias{get_totals,LogRatio-method} \alias{set_totals<-,CompositionMatrix-method} \alias{set_totals,CompositionMatrix-method} \title{Row Sums} @@ -18,6 +19,8 @@ set_totals(x) <- value \S4method{get_totals}{CompositionMatrix}(x) +\S4method{get_totals}{LogRatio}(x) + \S4method{set_totals}{CompositionMatrix}(x) <- value } \arguments{ @@ -55,7 +58,6 @@ head(X) \seealso{ Other mutators: \code{\link{groups}}, -\code{\link{samples}}, \code{\link{split}()}, \code{\link{subset}()} } diff --git a/vignettes/nexus.Rmd b/vignettes/nexus.Rmd index 478efd4..9869b3c 100644 --- a/vignettes/nexus.Rmd +++ b/vignettes/nexus.Rmd @@ -62,100 +62,57 @@ counts <- as_amounts(coda) all.equal(hongite, as.data.frame(counts)) ``` + + ```{r plot, fig.width=7, fig.height=5, out.width='100%'} ## Ternary plots plot(coda) ``` -```{r barplot, fig.width=7, fig.height=5, out.width='100%'} -## Compositional bar plot -barplot(coda, order = "A") -``` - -## Working with (reference) groups +# Working with (reference) groups Provenance studies typically rely on two approaches, which can be used together: * Identification of groups among the artifacts being studied, based on mineralogical or geochemical criteria (*clustering*). * Comparison with so-called reference groups, i.e. known geological sources or productive contexts (*classification*). -**nexus** allows to specify whether an observation belongs to a specific group (or not). When coercing a `data.frame` to a `CompositionMatrix` object, an attempt is made to automatically detect groups by mapping column names. - -```{r} -## Create a data.frame -X <- data.frame( - groups = c("A", "A", "B", "A", "B", "C", "C", "C", "B"), - Ca = c(7.72, 7.32, 3.11, 7.19, 7.41, 5, 4.18, 1, 4.51), - Fe = c(6.12, 5.88, 5.12, 6.18, 6.02, 7.14, 5.25, 5.28, 5.72), - Na = c(0.97, 1.59, 1.25, 0.86, 0.76, 0.51, 0.75, 0.52, 0.56) -) - -## Coerce to a compositional matrix -Y <- as_composition(X) -any_assigned(Y) -``` - -This behavior can be disabled by setting `options(nexus.autodetect = FALSE)` or overridden by explicitly specifying the column to be used with the `groups` argument of `as_composition()`. +**nexus** allows to specify whether an observation belongs to a specific group (or not): -`get_groups(x)` and `set_groups(x) <- value` allow to retrieve or set groups of an existing `CompositionMatrix` (`NA` can be used to specify that a sample does not belong to any group): - -```{r} -## Set groups (NA means no group) -set_groups(Y) <- c("X", "X", "Y", "X", "Y", NA, NA, NA, "Y") - -## Retrieve groups -get_groups(Y) -``` - -Once groups have been defined, they can be used by further methods (e.g. plotting). - -## Working with repeated measurements - -In some situations, measurements may have been repeated (e.g. multiple chemical analyses on the same sample). The presence of repeated measurements can be specified by giving several observations the same sample name. - -When coercing a `data.frame` to a `CompositionMatrix` object, an attempt is made to automatically detect samples by mapping column names. If no matching column is found, row names will be used by default. +The `parts` argument of the function `as_composition()` is used to define the columns to be used as the compositional part. If `parts` is `NULL` (the default), all non-integer numeric columns (i.e. of type `double`) are used. In the case of a `data.frame` coercion, additional columns are stored internally. This preserves the information contained in these additional variables (whether qualitative or quantitative). ```{r} ## Create a data.frame X <- data.frame( - samples = c("A", "A", "A", "B", "B", "B", "C", "C", "C"), + type = c("A", "A", "B", "A", "B", "C", "C", "C", "B"), Ca = c(7.72, 7.32, 3.11, 7.19, 7.41, 5, 4.18, 1, 4.51), Fe = c(6.12, 5.88, 5.12, 6.18, 6.02, 7.14, 5.25, 5.28, 5.72), Na = c(0.97, 1.59, 1.25, 0.86, 0.76, 0.51, 0.75, 0.52, 0.56) ) ## Coerce to a compositional matrix +## (the 'type' column will be preserved) Y <- as_composition(X) -any_replicated(Y) ``` -This behavior can be disabled by setting `options(nexus.autodetect = FALSE)` or overridden by explicitly specifying the column to be used with the `samples` argument of `as_composition()`. +These additional variables can be used by further methods (e.g. through the `groups` argument of the plotting functions): -`get_samples(x)` and `set_samples(x) <- value` allow to retrieve or set sample names of an existing `CompositionMatrix` (missing values are not allowed): - -```{r} -## Set sample names -set_samples(Y) <- c("A", "B", "C", "D", "E", "F", "G", "H", "I") - -## Retrieve groups -get_samples(Y) +```{r barplot, fig.width=7, fig.height=5, out.width='100%'} +## Compositional bar plot +barplot(Y, groups = "type", order = "Ca") ``` -Note that the presence of repeated measurements may affect some calculations (read the documentation carefully). - # Log-ratio transformations -The package provides the following transformations: centered log ratio (*CLR*, Aitchison 1986), additive log ratio (*ALR*, Aitchison 1986), isometric log ratio (*ILR*, Egozcue et al. 2003) and pivot log-ratio (*PLR*, Hron et al. 2017). +The package provides the following (inverse) transformations: centered log ratio (*CLR*, Aitchison 1986), additive log ratio (*ALR*, Aitchison 1986), isometric log ratio (*ILR*, Egozcue et al. 2003) and pivot log-ratio (*PLR*, Hron et al. 2017). ```{r transform, fig.width=7, fig.height=8, out.width='100%'} ## CLR clr <- transform_clr(coda) -head(clr) - -plot(clr) +## Back transform back <- transform_inverse(clr) -head(back) + +all.equal(back, coda) ``` # Multivariate methods @@ -168,39 +125,43 @@ data("kommos", package = "folio") kommos <- remove_NA(kommos, margin = 1) ## Coerce to a compositional matrix -coda <- as_composition(kommos) - -## Set groups -set_groups(coda) <- kommos$type +coda <- as_composition(kommos, parts = 3:22) ``` ```{r ceramics-barplot, fig.width=7, fig.height=7, out.width='100%'} ## Compositional bar plot -barplot(coda, order = "Ca") +barplot(coda, groups = "type", order = "Ca") ``` ## Principle Component Analysis -```{r ceramics-pca, fig.width=7, fig.height=7, out.width='50%', fig.show='hold'} +```{r pca, fig.width=7, fig.height=7, out.width='50%', fig.show='hold'} ## CLR clr <- transform_clr(coda) ## PCA clr_pca <- pca(clr, scale = FALSE) -viz_individuals(clr_pca, highlight = get_groups(coda), pch = 16, - col = c("#EE7733", "#0077BB", "#33BBEE", "#EE3377")) +## Visualize results +viz_individuals( + x = clr_pca, + highlight = "type", + pch = 16, + col = c("#EE7733", "#0077BB", "#33BBEE", "#EE3377") +) viz_variables(clr_pca) ``` ## MANOVA -```{r ceramics-manova} +```{r manova} ## ILR ilr <- transform_ilr(coda) +kommos$ilr <- ilr + ## MANOVA -fit <- manova(ilr ~ get_groups(ilr)) +fit <- manova(ilr ~ type, data = kommos) summary(fit) ``` @@ -208,9 +169,9 @@ The MANOVA results suggest that there are statistically significant differences ## Discriminant Analysis -```{r ceramics-lda, fig.width=7, fig.height=7, out.width='100%'} +```{r lda, fig.width=7, fig.height=7, out.width='100%'} ## LDA -discr <- MASS::lda(ilr, grouping = get_groups(ilr)) +discr <- MASS::lda(type ~ ilr, data = kommos) plot(discr) ## Back transform results