diff --git a/R/merge.R b/R/merge.R index ab93d5498..758269a71 100644 --- a/R/merge.R +++ b/R/merge.R @@ -35,10 +35,14 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL if (!is.null(by.x)) { if (length(by.x)==0L || !is.character(by.x) || !is.character(by.y)) stopf("A non-empty vector of column names is required for `by.x` and `by.y`.") - if (!all(by.x %chin% nm_x)) - stopf("Elements listed in `by.x` must be valid column names in x.") - if (!all(by.y %chin% nm_y)) - stopf("Elements listed in `by.y` must be valid column names in y.") + if (!all(by.x %chin% nm_x)) { + missing_by_x = setdiff(by.x, nm_x) + stopf("Elements listed in `by.x` must be valid column names in x. Missing: %s", paste(missing_by_x, collapse = ", ")) + } + if (!all(by.y %chin% nm_y)) { + missing_by_y = setdiff(by.y, nm_y) + stopf("Elements listed in `by.y` must be valid column names in y. Missing: %s", paste(missing_by_y, collapse = ", ")) + } by = by.x names(by) = by.y } else { @@ -50,8 +54,12 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL by = intersect(nm_x, nm_y) if (length(by) == 0L || !is.character(by)) stopf("A non-empty vector of column names for `by` is required.") - if (!all(by %chin% intersect(nm_x, nm_y))) - stopf("Elements listed in `by` must be valid column names in x and y") + if (!all(by %chin% intersect(nm_x, nm_y))) { + missing_in_x = setdiff(by, nm_x) + missing_in_y = setdiff(by, nm_y) + stopf("Elements listed in `by` must be valid column names in x and y. Missing in x: %s. Missing in y: %s", + paste(missing_in_x, collapse = ", "), paste(missing_in_y, collapse = ", ")) + } by = unname(by) by.x = by.y = by } diff --git a/man/setkey.Rd b/man/setkey.Rd index 378f1a106..82e584ca3 100644 --- a/man/setkey.Rd +++ b/man/setkey.Rd @@ -55,7 +55,18 @@ haskey(x) \arguments{ \item{x}{ A \code{data.table}. } \item{\dots}{ The columns to sort by. Do not quote the column names. If \code{\dots} is missing (i.e. \code{setkey(DT)}), all the columns are used. \code{NULL} removes the key. } -\item{cols}{ A character vector of column names. For \code{setindexv}, this can be a \code{list} of character vectors, in which case each element will be applied as an index in turn. } +\item{cols}{ A character vector of column names. For \code{setindexv}, this can be a \code{list} of character vectors, in which case each element will be applied as an index in turn. + +# Example: Single character vector of column names +DT = data.table(A = 5:1, B = letters[5:1], C = 10:6) +setindex(DT, A) # Set index using a single column +indices(DT) # View the indices + +# Example: List of character vectors +setindexv(DT, list(c("A", "B"), c("B", "C"))) # Setting multiple indices +indices(DT) # View all indices +indices(DT, vectors = TRUE) # View indices as a list of vectors +} \item{verbose}{ Output status and information. } \item{physical}{ \code{TRUE} changes the order of the data in RAM. \code{FALSE} adds an index. } \item{vectors}{ \code{logical} scalar, default \code{FALSE}; when set to \code{TRUE}, a \code{list} of character vectors is returned, each referring to one index. }