Skip to content

Commit

Permalink
cleanup; added test
Browse files Browse the repository at this point in the history
  • Loading branch information
mllg committed Aug 15, 2016
1 parent 49bd0d3 commit 4f503e8
Show file tree
Hide file tree
Showing 13 changed files with 88 additions and 108 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Depends:
R (>= 3.0.2),
BBmisc (>= 1.10),
ggplot2,
ParamHelpers (>= 1.8),
ParamHelpers (>= 1.9),
stats,
stringi
Imports:
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ S3method(getTaskTargetNames,TaskDescUnsupervised)
S3method(getTaskTargets,CostSensTask)
S3method(getTaskTargets,SupervisedTask)
S3method(getTaskTargets,UnsupervisedTask)
S3method(hasExpression,Learner)
S3method(impute,Task)
S3method(impute,data.frame)
S3method(isFailureModel,BaseWrapperModel)
Expand Down
4 changes: 4 additions & 0 deletions R/Learner_properties.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,7 @@ getSupportedLearnerProperties = function(type = NA_character_) {
p[[type]]
}

#' @export
hasExpression.Learner = function(par) {
any(hasExpression(par$par.set)) || any(vlapply(par$par.vals, is.expression))
}
16 changes: 8 additions & 8 deletions R/Task_operators.R
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ getTaskData = function(task, subset, features, target.extra = FALSE, recode.targ
checkTask(task, "Task")

if (missing(subset)) {
subset = NULL
subset = NULL
} else {
assert(checkIntegerish(subset), checkLogical(subset))
if (is.logical(subset))
Expand All @@ -266,7 +266,7 @@ getTaskData = function(task, subset, features, target.extra = FALSE, recode.targ
assertLogical(target.extra)

task.features = getTaskFeatureNames(task)

# if supplied check if the input is right and always convert 'features'
# to character vec
if (!missing(features)) {
Expand Down Expand Up @@ -459,13 +459,13 @@ getTaskWeights = function(task) {
# features (p), the number of observations (n), the task type (type) and in
# case of classification tasks the number of class levels (k)
makeTaskDictionary = function(task) {
dict = list()
dict$task = task
dict$p = getTaskNFeats(task)
dict$n = getTaskSize(task)
dict$type = getTaskType(task)
dict = list(
task = task,
p = getTaskNFeats(task),
n = getTaskSize(task),
type = getTaskType(task)
)
if (dict$type == "classif")
dict$k = length(getTaskClassLevels(task))
# dict$keys = setdiff(c(names(task$task.desc), names(task), "data", names(task$env$data)), names(dict))
return(dict)
}
61 changes: 32 additions & 29 deletions R/evaluateLearner.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,22 @@
#' @title Evaluates expressions within a learner or parameter set according to the task.
#'
#' @description Updates learners and/or parameter sets by evaluating their expressions
#' based on a specific task. An overview of the possible expressions can be found in the details.
#' @description
#' A \code{\link{Learner}} or \code{\link[ParamHelpers]{ParamSet}} can contain an unevaluated \code{\link[base]{expression}}
#' as value for a hyperparameter.
#' E.g., these expressions are used if the default value dependents on the task size or an upper limit for a parameter
#' is given by the number of features in a task.
#' The provided functions evaluate such expressions in an environment (dictionary) which holds the following information:
#' \itemize{
#' \item{\code{task}:} the task itself, allowing to access any of its elements.
#' \item{\code{p}:} the number of features in the task
#' \item{\code{n}:} the number of observations in the task
#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel"
#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks)
#' }
#' Usually the evaluation of the expression is performed automatically, e.g. in \code{\link{train}} or
#' \code{\link{tuneParams}}.
#' Therefore calling \code{evaluateParamSet} or \code{evaluateLearner} manually should not be necessary.
#'
#' @template arg_learner
#' @param par.set [\code{\link[ParamHelpers]{ParamSet}}]\cr
#' Parameter set of (hyper)parameters and their constraints.
Expand All @@ -11,64 +26,52 @@
#' @return [\code{\link{Learner}} | \code{\link[ParamHelpers]{ParamSet}}].
#' @name evaluateLearner
#' @rdname evaluateLearner
#' @details The expressions can be based on any information provided by the task. For convenience,
#' the most often used keys are available directly
#' \itemize{
#' \item{\code{task}:} the task itself, allowing to access any of its elements
#' \item{\code{p}:} the number of features in the task
#' \item{\code{n}:} the number of observations in the task
#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel"
#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks)
#' }
#' However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could
#' access the "blocking" via \code{task$task.desc$has.blocking}.
#' @export
#' @examples
#' ## (1) evaluation of a learner's hyperparameters
#' task = makeClassifTask(data = iris, target = "Species")
#' lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p),
#' minbucket = expression(3L + 4L * task$task.desc$has.blocking))
#' lrn2 = evaluateLearner(learner = lrn1, task = task)
#'
#' lrn1$par.vals
#' lrn2$par.vals
#'
#'
#' getHyperPars(lrn1)
#' getHyperPars(lrn2)
#'
#' ## (2) evaluation of a learner's entire parameter set
#' task = makeClassifTask(data = iris, target = "Species")
#' lrn1 = makeLearner("classif.randomForest")
#' lrn2 = evaluateLearner(learner = lrn1, task = task)
#'
#' ## focus on the parameters 'mtry', 'classwt' and 'cutoff'
#' lrn1$par.set
#' lrn2$par.set
#'
#'
#' ## Note the values for parameters 'mtry', 'classwt' and 'cutoff'
#' getParamSet(lrn1)
#' getParamSet(lrn2)
#'
#' ## (3) evaluation of a parameter set
#' task = makeClassifTask(data = iris, target = "Species")
#' ps1 = makeParamSet(
#' makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x),
#' makeDiscreteParam("sigma", values = expression(list(k, p)))
#' )
#' ps2 = evaluateParset(par.set = ps1, task = task)
#' @export
#' evaluateParset(par.set = ps1, task = task)
evaluateLearner = function(learner, task) {
dict = makeTaskDictionary(task = task)
learner$par.set = evaluateParset(learner$par.set, task = task)
if (length(learner$par.vals) > 0 && any(vlapply(learner$par.vals, is.expression)))
if (any(vlapply(learner$par.vals, is.expression)))
learner$par.vals = lapply(learner$par.vals, function(expr) eval(expr, envir = dict))
return(learner)
}

#' @rdname evaluateLearner
#' @export
evaluateParset = function(par.set, task) {
dict = makeTaskDictionary(task = task)
if (hasExpression(par = par.set)) {
dict = makeTaskDictionary(task = task)
checkParamSet(par.set = par.set, dict = dict)
par.set = evaluateParamSet(par.set = par.set, dict = dict)
## assure that the value names are also shown if the values list was unnamed
par.set$pars = lapply(par.set$pars, function(x) {
if (is.null(x$values) || !is.null(names(x$values)))
return(x)
names(x$values) = unlist(lapply(x$values, function(vals) vals))
if (!is.null(x$values) && is.null(names(x$values)))
names(x$values) = unlist(x$values)
return(x)
})
}
Expand Down
12 changes: 1 addition & 11 deletions R/makeLearner.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,7 @@
#' @return [\code{\link{Learner}}].
#' @family learner
#' @export
#' @details Note that learners can also contain task dependent expressions, which can be based on any
#' information provided by the task. For convenience, the most often used keys are available directly
#' \itemize{
#' \item{\code{task}:} the task itself, allowing to access any of its elements
#' \item{\code{p}:} the number of features in the task
#' \item{\code{n}:} the number of observations in the task
#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel"
#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks)
#' }
#' However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could
#' access the "blocking" via \code{task$task.desc$has.blocking}.
#' @note Learners can contain task dependent expressions, see \code{\link{evaluateLearner}} for more information.
#' @aliases Learner
#' @seealso [\code{\link{resample}}], [\code{\link{predict.WrappedModel}}]
#' @examples
Expand Down
20 changes: 5 additions & 15 deletions R/setHyperPars.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,7 @@
#' @note If a named (hyper)parameter can't be found for the given learner, the 3
#' closest (hyper)parameter names will be output in case the user mistyped.
#' @export
#' @details Note that learners can also contain task dependent expressions, which can be based on any
#' information provided by the task. For convenience, the most often used keys are available directly
#' \itemize{
#' \item{\code{task}:} the task itself, allowing to access any of its elements
#' \item{\code{p}:} the number of features in the task
#' \item{\code{n}:} the number of observations in the task
#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel"
#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks)
#' }
#' However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could
#' access the "blocking" via \code{task$task.desc$has.blocking}.
#' @note Learners can contain task dependent expressions, see \code{\link{evaluateLearner}} for more information.
#' @family learner
#' @importFrom utils adist
#' @examples
Expand Down Expand Up @@ -70,14 +60,14 @@ setHyperPars2.Learner = function(learner, par.vals) {
indices = order(adist(n, parnames))[1:3]
possibles = na.omit(parnames[indices])
if (length(possibles) > 0) {
messagef("%s: couldn't find hyperparameter '%s'\nDid you mean one of these hyperparameters instead: %s",
messagef("%s: couldn't find hyperparameter '%s'\nDid you mean one of these hyperparameters instead: %s",
learner$id, n, stri_flatten(possibles, collapse = " "))
}

# no description: stop warn or quiet
msg = sprintf("%s: Setting parameter %s without available description object!\nYou can switch off this check by using configureMlr!",
msg = sprintf("%s: Setting parameter %s without available description object!\nYou can switch off this check by using configureMlr!",
learner$id, n)

if (on.par.without.desc == "stop") {
stop(msg)
} else if (on.par.without.desc == "warn") {
Expand Down
2 changes: 1 addition & 1 deletion R/train.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#' print(mod)
train = function(learner, task, subset, weights = NULL) {
learner = checkLearner(learner)
if (hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression)))
if (hasExpression(learner))
learner = evaluateLearner(learner = learner, task = task)
assertClass(task, classes = "Task")
if (missing(subset)) {
Expand Down
2 changes: 1 addition & 1 deletion R/tuneParams.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
tuneParams = function(learner, task, resampling, measures, par.set, control, show.info = getMlrOption("show.info")) {
learner = checkLearner(learner)
assertClass(task, classes = "Task")
if (hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression)))
if (hasExpression(learner))
learner = evaluateLearner(learner = learner, task = task)
measures = checkMeasures(measures, learner)
assertClass(par.set, classes = "ParamSet")
Expand Down
30 changes: 15 additions & 15 deletions man/evaluateLearner.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 2 additions & 12 deletions man/makeLearner.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 2 additions & 13 deletions man/setHyperPars.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 15 additions & 2 deletions tests/testthat/test_base_evaluateLearner.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ test_that("expressions in learners", {
expect_true(is.expression(x1))
expect_true(!is.expression(x2))
expect_equal(x2, binaryclass.task$task.desc$has.blocking)

## (4) expressions within hyperparameters
x1 = lrn1$par.vals$minsplit
x2 = lrn2$par.vals$minsplit
Expand All @@ -48,9 +48,22 @@ test_that("expressions in parameter sets", {
makeDiscreteParam("sigma", values = expression(list(p, k)))
)
ps2 = evaluateParset(par.set = ps1, task = binaryclass.task)

## expressions within parameter sets
expect_equal(ps2$pars$C$lower, 2L)
expect_equal(ps2$pars$C$upper, 208L)
expect_equal(ps2$pars$sigma$values, list("60" = 60, "2" = 2))
})

test_that("tuning works with expressions", {
task = multiclass.small.task
lrn = makeLearner("classif.rpart")
lrn = makeFilterWrapper(lrn, fw.method = "kruskal.test")
ps = makeParamSet(
makeIntegerParam("fw.abs", lower = 1, upper = expression(ceiling(n/2)))
)
ctrl = makeTuneControlRandom(maxit = 5)
res = tuneParams(lrn, task = task, resampling = hout, par.set = ps, control = ctrl)
res = as.data.frame(res$opt.path)
expect_integer(res$fw.abs, lower = 1, upper = ceiling(getTaskSize(task)/2), any.missing = FALSE)
})

0 comments on commit 4f503e8

Please sign in to comment.