Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor internal helpers which generate fragmented messages #6534

Merged
merged 3 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 14 additions & 21 deletions R/fmelt.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,11 @@ measure = function(..., sep="_", pattern, cols, multiple.keyword="value.name") {
}
fun.list[[fun.i]] = fun
}
measurev.args = c(
list(fun.list),
L[formal.i.vec],
list(group.desc="... arguments to measure"))
measurev.args = c(list(fun.list), L[formal.i.vec])
do.call(measurev, measurev.args)
}

measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.name", group.desc="elements of fun.list"){
measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.name"){
# 1. basic error checking.
if (!missing(sep) && !missing(pattern)) {
stopf("both sep and pattern arguments used; must use either sep or pattern (not both)")
Expand All @@ -88,21 +85,11 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na
which(!nzchar(names(fun.list)))
}
if (length(prob.i)) {
stopf("in measurev, %s must be named, problems: %s", group.desc, brackify(prob.i))
stopf("in measurev, elements of fun.list must be named, problems: %s", brackify(prob.i))
}
err.names.unique = function(err.what, name.vec) {
name.tab = table(name.vec)
bad.counts = name.tab[1 < name.tab]
if (length(bad.counts)) {
stopf("%s should be uniquely named, problems: %s", err.what, brackify(names(bad.counts)))
}
}
err.args.groups = function(type, N){
if (N != length(fun.list)) {
stopf("number of %s =%d must be same as %s =%d", group.desc, length(fun.list), type, N)
}
if (length(dup.funs <- duplicated_values(names(fun.list)))) {
stopf("elements of fun.list should be uniquely named, problems: %s", brackify(dup.funs))
}
err.names.unique(group.desc, names(fun.list))
# 2. compute initial group data table, used as variable_table attribute.
group.mat = if (!missing(pattern)) {
if (!is.character(pattern)) {
Expand All @@ -117,7 +104,9 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na
if (is.null(start)) {
stopf("pattern must contain at least one capture group (parenthesized sub-pattern)")
}
err.args.groups("number of capture groups in pattern", ncol(start))
if (ncol(start) != length(fun.list)) {
stopf("number of elements of fun.list (%d) must be the same as the number of capture groups in pattern (%d)", length(fun.list), ncol(start))
}
end = attr(match.vec, "capture.length")[measure.vec.i,]+start-1L
measure.vec <- cols[measure.vec.i]
names.mat = matrix(measure.vec, nrow(start), ncol(start))
Expand All @@ -132,12 +121,16 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na
if (n.groups == 1) {
stopf("each column name results in only one item after splitting using sep, which means that all columns would be melted; to fix please either specify melt on all columns directly without using measure, or use a different sep/pattern specification")
}
err.args.groups("max number of items after splitting column names", n.groups)
if (n.groups != length(fun.list)) {
stopf("number of elements of fun.list (%d) must be the same as the max number of items after splitting column names (%d)", length(fun.list), n.groups)
}
measure.vec.i = which(vector.lengths==n.groups)
measure.vec = cols[measure.vec.i]
do.call(rbind, list.of.vectors[measure.vec.i])
}
err.names.unique("measured columns", measure.vec)
if (length(dup.measures <- duplicated_values(measure.vec))) {
stopf("measured columns should be uniquely named, problems: %s", brackify(dup.measures))
}
uniq.mat = unique(group.mat)
if (nrow(uniq.mat) < nrow(group.mat)) {
stopf("number of unique column IDs =%d is less than number of melted columns =%d; fix by changing pattern/sep", nrow(uniq.mat), nrow(group.mat))
Expand Down
10 changes: 8 additions & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,17 @@ check_duplicate_names = function(x, table_name=deparse(substitute(x))) {
if (!anyDuplicated(nm <- names(x))) return(invisible())
duplicate_names = unique(nm[duplicated(nm)])
stopf(ngettext(length(duplicate_names),
"%s has duplicated column name %s. Please remove or rename the duplicate and try again.",
"%s has duplicated column names %s. Please remove or rename the duplicates and try again."),
"%s has duplicated column name %s. Please remove or rename the duplicate and try again.",
"%s has duplicated column names %s. Please remove or rename the duplicates and try again."),
table_name, brackify(duplicate_names), domain=NA)
}

duplicated_values = function(x) {
# fast anyDuplicated for the typical/non-error case; second duplicated() pass for (usually) error case
if (!anyDuplicated(x)) return(vector(typeof(x)))
unique(x[duplicated(x)])
}

# TODO(R>=4.0.0): Remove this workaround. From R 4.0.0, rep_len() dispatches rep.Date(), which we need.
# Before that, rep_len() strips attributes --> breaks data.table()'s internal recycle() helper.
# This also impacts test 2 in S4.Rraw, because the error message differs for rep.int() vs. rep_len().
Expand Down
10 changes: 5 additions & 5 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -17301,11 +17301,11 @@ test(2183.00020, melt(iris.dt, measure.vars=measurev(value.name, dim, sep=".", p
test(2183.000201, melt(iris.dt, measure.vars=measurev(list(NULL, dim=NULL), sep=".")), error="in measurev, elements of fun.list must be named, problems: [1]")
test(2183.000202, melt(iris.dt, measure.vars=measurev(list(NULL, NULL), sep=".")), error="in measurev, elements of fun.list must be named, problems: [1, 2]")
test(2183.00027, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim="bar"), sep=".")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: dim")
test(2183.00028, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), sep=".")), error="number of elements of fun.list =3 must be same as max number of items after splitting column names =2")
test(2183.00028, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), sep=".")), error="number of elements of fun.list (3) must be the same as the max number of items after splitting column names (2)")
test(2183.00042, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=function()1), pattern="([ab])([12])")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: istr")
test(2183.00043, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=interactive), pattern="([ab])([12])")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: istr")
test(2183.00044, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=function(x)1), pattern="([ab])([12])")), error="each conversion function must return an atomic vector with same length as its first argument, problem: istr")
test(2183.00045, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), pattern="(.*)[.](.*)")), error="number of elements of fun.list =3 must be same as number of capture groups in pattern =2")
test(2183.00045, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), pattern="(.*)[.](.*)")), error="number of elements of fun.list (3) must be the same as the number of capture groups in pattern (2)")
test(2183.00048, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, value.name=NULL), sep=".")), error="elements of fun.list should be uniquely named, problems: [value.name]")
# measure with factor conversion.
myfac = function(x)factor(x)#user-defined conversion function.
Expand Down Expand Up @@ -17348,7 +17348,7 @@ test(2183.24, names(melt(iris.dt, measure.vars=measure(value.name, dim, sep=".")
test(2183.25, names(melt(iris.dt, measure.vars=measure(part, value.name, sep="."))), c("Species", "part", "Length", "Width"))
test(2183.26, names(melt(iris.dt, measure.vars=measure(part, dim, sep="."))), c("Species", "part", "dim", "value"))
test(2183.27, melt(iris.dt, measure.vars=measure(value.name, dim="bar", sep=".")), error="each ... argument to measure must be a function with at least one argument, problem: dim")
test(2183.28, melt(iris.dt, measure.vars=measure(value.name, dim, baz, sep=".")), error="number of ... arguments to measure =3 must be same as max number of items after splitting column names =2")
test(2183.28, melt(iris.dt, measure.vars=measure(value.name, dim, baz, sep=".")), error="number of elements of fun.list (3) must be the same as the max number of items after splitting column names (2)")
test(2183.29, melt(iris.dt, measure.vars=measure()), error="each column name results in only one item after splitting using sep, which means that all columns would be melted; to fix please either specify melt on all columns directly without using measure, or use a different sep/pattern specification")
# patterns with iris data.
test(2183.40, names(melt(iris.dt, measure.vars=patterns("[.]"))), c("Species", "variable", "value"))
Expand All @@ -17357,10 +17357,10 @@ test(2183.41, melt(DTid, measure.vars=measure(value.name, istr="bar", pattern="(
test(2183.42, melt(DTid, measure.vars=measure(value.name, istr=function()1, pattern="([ab])([12])")), error="each ... argument to measure must be a function with at least one argument, problem: istr")
test(2183.43, melt(DTid, measure.vars=measure(value.name, istr=interactive, pattern="([ab])([12])")), error="each ... argument to measure must be a function with at least one argument, problem: istr")
test(2183.44, melt(DTid, measure.vars=measure(value.name, istr=function(x)1, pattern="([ab])([12])")), error="each conversion function must return an atomic vector with same length as its first argument, problem: istr")
test(2183.45, melt(iris.dt, measure.vars=measure(value.name, dim, baz, pattern="(.*)[.](.*)")), error="number of ... arguments to measure =3 must be same as number of capture groups in pattern =2")
test(2183.45, melt(iris.dt, measure.vars=measure(value.name, dim, baz, pattern="(.*)[.](.*)")), error="number of elements of fun.list (3) must be the same as the number of capture groups in pattern (2)")
test(2183.46, melt(iris.dt, measure.vars=measure(function(x)factor(x), dim, pattern="(.*)[.](.*)")), error="each ... argument to measure must be either a symbol without argument name, or a function with argument name, problems: [1]")
test(2183.47, melt(iris.dt, measure.vars=measure(function(x)factor(x), pattern="(.*)[.](.*)")), error="each ... argument to measure must be either a symbol without argument name, or a function with argument name, problems: [1]")
test(2183.48, melt(iris.dt, measure.vars=measure(value.name, value.name, sep=".")), error="... arguments to measure should be uniquely named, problems: [value.name]")
test(2183.48, melt(iris.dt, measure.vars=measure(value.name, value.name, sep=".")), error="elements of fun.list should be uniquely named, problems: [value.name]")
# measure with factor conversion.
myfac = function(x)factor(x)#user-defined conversion function.
test(2183.60, melt(DTid, measure.vars=measure(letter=myfac, value.name, pattern="([ab])([12])")), data.table(id=1, letter=factor(c("a","b")), "2"=c(2,2), "1"=c(NA,1)))
Expand Down
4 changes: 1 addition & 3 deletions man/measure.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
}
\usage{
measure(\dots, sep, pattern, cols, multiple.keyword="value.name")
measurev(fun.list, sep, pattern, cols, multiple.keyword="value.name",
group.desc="elements of fun.list")
measurev(fun.list, sep, pattern, cols, multiple.keyword="value.name")
}
\arguments{
\item{\dots}{One or more (1) symbols (without argument name; symbol
Expand All @@ -44,7 +43,6 @@ measurev(fun.list, sep, pattern, cols, multiple.keyword="value.name",
value columns (with names defined by the unique values in that
group). Otherwise if the string not used as a group name, then
measure returns a vector and melt returns a single value column.}
\item{group.desc}{Internal, used in error messages.}
}
\seealso{
\code{\link{melt}},
Expand Down
Loading