Skip to content

Commit

Permalink
Refactor internal helpers which generate fragmented messages (#6534)
Browse files Browse the repository at this point in the history
* Refacator internal helpers which generate fragmented messages

* Update test error strings

* deprecate internal group.desc argument
  • Loading branch information
MichaelChirico authored Sep 24, 2024
1 parent 6abbaaf commit 960ff28
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 31 deletions.
35 changes: 14 additions & 21 deletions R/fmelt.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,11 @@ measure = function(..., sep="_", pattern, cols, multiple.keyword="value.name") {
}
fun.list[[fun.i]] = fun
}
measurev.args = c(
list(fun.list),
L[formal.i.vec],
list(group.desc="... arguments to measure"))
measurev.args = c(list(fun.list), L[formal.i.vec])
do.call(measurev, measurev.args)
}

measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.name", group.desc="elements of fun.list"){
measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.name"){
# 1. basic error checking.
if (!missing(sep) && !missing(pattern)) {
stopf("both sep and pattern arguments used; must use either sep or pattern (not both)")
Expand All @@ -88,21 +85,11 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na
which(!nzchar(names(fun.list)))
}
if (length(prob.i)) {
stopf("in measurev, %s must be named, problems: %s", group.desc, brackify(prob.i))
stopf("in measurev, elements of fun.list must be named, problems: %s", brackify(prob.i))
}
err.names.unique = function(err.what, name.vec) {
name.tab = table(name.vec)
bad.counts = name.tab[1 < name.tab]
if (length(bad.counts)) {
stopf("%s should be uniquely named, problems: %s", err.what, brackify(names(bad.counts)))
}
}
err.args.groups = function(type, N){
if (N != length(fun.list)) {
stopf("number of %s =%d must be same as %s =%d", group.desc, length(fun.list), type, N)
}
if (length(dup.funs <- duplicated_values(names(fun.list)))) {
stopf("elements of fun.list should be uniquely named, problems: %s", brackify(dup.funs))
}
err.names.unique(group.desc, names(fun.list))
# 2. compute initial group data table, used as variable_table attribute.
group.mat = if (!missing(pattern)) {
if (!is.character(pattern)) {
Expand All @@ -117,7 +104,9 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na
if (is.null(start)) {
stopf("pattern must contain at least one capture group (parenthesized sub-pattern)")
}
err.args.groups("number of capture groups in pattern", ncol(start))
if (ncol(start) != length(fun.list)) {
stopf("number of elements of fun.list (%d) must be the same as the number of capture groups in pattern (%d)", length(fun.list), ncol(start))
}
end = attr(match.vec, "capture.length")[measure.vec.i,]+start-1L
measure.vec <- cols[measure.vec.i]
names.mat = matrix(measure.vec, nrow(start), ncol(start))
Expand All @@ -132,12 +121,16 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na
if (n.groups == 1) {
stopf("each column name results in only one item after splitting using sep, which means that all columns would be melted; to fix please either specify melt on all columns directly without using measure, or use a different sep/pattern specification")
}
err.args.groups("max number of items after splitting column names", n.groups)
if (n.groups != length(fun.list)) {
stopf("number of elements of fun.list (%d) must be the same as the max number of items after splitting column names (%d)", length(fun.list), n.groups)
}
measure.vec.i = which(vector.lengths==n.groups)
measure.vec = cols[measure.vec.i]
do.call(rbind, list.of.vectors[measure.vec.i])
}
err.names.unique("measured columns", measure.vec)
if (length(dup.measures <- duplicated_values(measure.vec))) {
stopf("measured columns should be uniquely named, problems: %s", brackify(dup.measures))
}
uniq.mat = unique(group.mat)
if (nrow(uniq.mat) < nrow(group.mat)) {
stopf("number of unique column IDs =%d is less than number of melted columns =%d; fix by changing pattern/sep", nrow(uniq.mat), nrow(group.mat))
Expand Down
10 changes: 8 additions & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,17 @@ check_duplicate_names = function(x, table_name=deparse(substitute(x))) {
if (!anyDuplicated(nm <- names(x))) return(invisible())
duplicate_names = unique(nm[duplicated(nm)])
stopf(ngettext(length(duplicate_names),
"%s has duplicated column name %s. Please remove or rename the duplicate and try again.",
"%s has duplicated column names %s. Please remove or rename the duplicates and try again."),
"%s has duplicated column name %s. Please remove or rename the duplicate and try again.",
"%s has duplicated column names %s. Please remove or rename the duplicates and try again."),
table_name, brackify(duplicate_names), domain=NA)
}

duplicated_values = function(x) {
# fast anyDuplicated for the typical/non-error case; second duplicated() pass for (usually) error case
if (!anyDuplicated(x)) return(vector(typeof(x)))
unique(x[duplicated(x)])
}

# TODO(R>=4.0.0): Remove this workaround. From R 4.0.0, rep_len() dispatches rep.Date(), which we need.
# Before that, rep_len() strips attributes --> breaks data.table()'s internal recycle() helper.
# This also impacts test 2 in S4.Rraw, because the error message differs for rep.int() vs. rep_len().
Expand Down
10 changes: 5 additions & 5 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -17301,11 +17301,11 @@ test(2183.00020, melt(iris.dt, measure.vars=measurev(value.name, dim, sep=".", p
test(2183.000201, melt(iris.dt, measure.vars=measurev(list(NULL, dim=NULL), sep=".")), error="in measurev, elements of fun.list must be named, problems: [1]")
test(2183.000202, melt(iris.dt, measure.vars=measurev(list(NULL, NULL), sep=".")), error="in measurev, elements of fun.list must be named, problems: [1, 2]")
test(2183.00027, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim="bar"), sep=".")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: dim")
test(2183.00028, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), sep=".")), error="number of elements of fun.list =3 must be same as max number of items after splitting column names =2")
test(2183.00028, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), sep=".")), error="number of elements of fun.list (3) must be the same as the max number of items after splitting column names (2)")
test(2183.00042, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=function()1), pattern="([ab])([12])")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: istr")
test(2183.00043, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=interactive), pattern="([ab])([12])")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: istr")
test(2183.00044, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=function(x)1), pattern="([ab])([12])")), error="each conversion function must return an atomic vector with same length as its first argument, problem: istr")
test(2183.00045, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), pattern="(.*)[.](.*)")), error="number of elements of fun.list =3 must be same as number of capture groups in pattern =2")
test(2183.00045, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), pattern="(.*)[.](.*)")), error="number of elements of fun.list (3) must be the same as the number of capture groups in pattern (2)")
test(2183.00048, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, value.name=NULL), sep=".")), error="elements of fun.list should be uniquely named, problems: [value.name]")
# measure with factor conversion.
myfac = function(x)factor(x)#user-defined conversion function.
Expand Down Expand Up @@ -17348,7 +17348,7 @@ test(2183.24, names(melt(iris.dt, measure.vars=measure(value.name, dim, sep=".")
test(2183.25, names(melt(iris.dt, measure.vars=measure(part, value.name, sep="."))), c("Species", "part", "Length", "Width"))
test(2183.26, names(melt(iris.dt, measure.vars=measure(part, dim, sep="."))), c("Species", "part", "dim", "value"))
test(2183.27, melt(iris.dt, measure.vars=measure(value.name, dim="bar", sep=".")), error="each ... argument to measure must be a function with at least one argument, problem: dim")
test(2183.28, melt(iris.dt, measure.vars=measure(value.name, dim, baz, sep=".")), error="number of ... arguments to measure =3 must be same as max number of items after splitting column names =2")
test(2183.28, melt(iris.dt, measure.vars=measure(value.name, dim, baz, sep=".")), error="number of elements of fun.list (3) must be the same as the max number of items after splitting column names (2)")
test(2183.29, melt(iris.dt, measure.vars=measure()), error="each column name results in only one item after splitting using sep, which means that all columns would be melted; to fix please either specify melt on all columns directly without using measure, or use a different sep/pattern specification")
# patterns with iris data.
test(2183.40, names(melt(iris.dt, measure.vars=patterns("[.]"))), c("Species", "variable", "value"))
Expand All @@ -17357,10 +17357,10 @@ test(2183.41, melt(DTid, measure.vars=measure(value.name, istr="bar", pattern="(
test(2183.42, melt(DTid, measure.vars=measure(value.name, istr=function()1, pattern="([ab])([12])")), error="each ... argument to measure must be a function with at least one argument, problem: istr")
test(2183.43, melt(DTid, measure.vars=measure(value.name, istr=interactive, pattern="([ab])([12])")), error="each ... argument to measure must be a function with at least one argument, problem: istr")
test(2183.44, melt(DTid, measure.vars=measure(value.name, istr=function(x)1, pattern="([ab])([12])")), error="each conversion function must return an atomic vector with same length as its first argument, problem: istr")
test(2183.45, melt(iris.dt, measure.vars=measure(value.name, dim, baz, pattern="(.*)[.](.*)")), error="number of ... arguments to measure =3 must be same as number of capture groups in pattern =2")
test(2183.45, melt(iris.dt, measure.vars=measure(value.name, dim, baz, pattern="(.*)[.](.*)")), error="number of elements of fun.list (3) must be the same as the number of capture groups in pattern (2)")
test(2183.46, melt(iris.dt, measure.vars=measure(function(x)factor(x), dim, pattern="(.*)[.](.*)")), error="each ... argument to measure must be either a symbol without argument name, or a function with argument name, problems: [1]")
test(2183.47, melt(iris.dt, measure.vars=measure(function(x)factor(x), pattern="(.*)[.](.*)")), error="each ... argument to measure must be either a symbol without argument name, or a function with argument name, problems: [1]")
test(2183.48, melt(iris.dt, measure.vars=measure(value.name, value.name, sep=".")), error="... arguments to measure should be uniquely named, problems: [value.name]")
test(2183.48, melt(iris.dt, measure.vars=measure(value.name, value.name, sep=".")), error="elements of fun.list should be uniquely named, problems: [value.name]")
# measure with factor conversion.
myfac = function(x)factor(x)#user-defined conversion function.
test(2183.60, melt(DTid, measure.vars=measure(letter=myfac, value.name, pattern="([ab])([12])")), data.table(id=1, letter=factor(c("a","b")), "2"=c(2,2), "1"=c(NA,1)))
Expand Down
4 changes: 1 addition & 3 deletions man/measure.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
}
\usage{
measure(\dots, sep, pattern, cols, multiple.keyword="value.name")
measurev(fun.list, sep, pattern, cols, multiple.keyword="value.name",
group.desc="elements of fun.list")
measurev(fun.list, sep, pattern, cols, multiple.keyword="value.name")
}
\arguments{
\item{\dots}{One or more (1) symbols (without argument name; symbol
Expand All @@ -44,7 +43,6 @@ measurev(fun.list, sep, pattern, cols, multiple.keyword="value.name",
value columns (with names defined by the unique values in that
group). Otherwise if the string not used as a group name, then
measure returns a vector and melt returns a single value column.}
\item{group.desc}{Internal, used in error messages.}
}
\seealso{
\code{\link{melt}},
Expand Down

0 comments on commit 960ff28

Please sign in to comment.