Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expression handling #1126

Merged
merged 22 commits into from
Mar 2, 2017
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Encoding: UTF-8
Roxygen: list(wrap = FALSE)
Depends:
R (>= 3.0.2),
ParamHelpers (>= 1.8),
ParamHelpers (>= 1.9)
Imports:
BBmisc (>= 1.10),
ggplot2,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ S3method(downsample,Task)
S3method(estimateRelativeOverfitting,ResampleDesc)
S3method(estimateResidualVariance,Learner)
S3method(estimateResidualVariance,WrappedModel)
S3method(evaluateParamExpressions,Learner)
S3method(generateCalibrationData,BenchmarkResult)
S3method(generateCalibrationData,Prediction)
S3method(generateCalibrationData,ResampleResult)
Expand Down Expand Up @@ -86,6 +87,7 @@ S3method(getTaskTargetNames,TaskDescUnsupervised)
S3method(getTaskTargets,CostSensTask)
S3method(getTaskTargets,SupervisedTask)
S3method(getTaskTargets,UnsupervisedTask)
S3method(hasExpression,Learner)
S3method(impute,Task)
S3method(impute,data.frame)
S3method(isFailureModel,BaseWrapperModel)
Expand Down Expand Up @@ -825,6 +827,7 @@ export(getTaskClassLevels)
export(getTaskCosts)
export(getTaskData)
export(getTaskDescription)
export(getTaskDictionary)
export(getTaskFeatureNames)
export(getTaskFormula)
export(getTaskId)
Expand Down
7 changes: 7 additions & 0 deletions R/Learner_properties.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,10 @@ getSupportedLearnerProperties = function(type = NA_character_) {
p[[type]]
}

#' @param obj [\code{\link{Learner}} | \code{character(1)}]\cr
#' Same as \code{learner} above.
#' @rdname LearnerProperties
#' @export
hasExpression.Learner = function(obj) {
any(hasExpression(obj$par.set)) || any(vlapply(obj$par.vals, is.expression))
}
6 changes: 3 additions & 3 deletions R/RLearner_classif_randomForest.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ makeRLearner.classif.randomForest = function() {
package = "randomForest",
par.set = makeParamSet(
makeIntegerLearnerParam(id = "ntree", default = 500L, lower = 1L),
makeIntegerLearnerParam(id = "mtry", lower = 1L),
makeIntegerLearnerParam(id = "mtry", lower = 1L, default = expression(floor(sqrt(p)))),
makeLogicalLearnerParam(id = "replace", default = TRUE),
makeNumericVectorLearnerParam(id = "classwt", lower = 0),
makeNumericVectorLearnerParam(id = "cutoff", lower = 0, upper = 1),
makeNumericVectorLearnerParam(id = "classwt", lower = 0, len = expression(k)),
makeNumericVectorLearnerParam(id = "cutoff", lower = 0, upper = 1, len = expression(k)),
makeUntypedLearnerParam(id = "strata", tunable = FALSE),
makeIntegerVectorLearnerParam(id = "sampsize", lower = 1L),
makeIntegerLearnerParam(id = "nodesize", default = 1L, lower = 1L),
Expand Down
32 changes: 30 additions & 2 deletions R/Task_operators.R
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ getTaskData = function(task, subset, features, target.extra = FALSE, recode.targ
checkTask(task, "Task")

if (missing(subset)) {
subset = NULL
subset = NULL
} else {
assert(checkIntegerish(subset), checkLogical(subset))
if (is.logical(subset))
Expand All @@ -266,7 +266,7 @@ getTaskData = function(task, subset, features, target.extra = FALSE, recode.targ
assertLogical(target.extra)

task.features = getTaskFeatureNames(task)

# if supplied check if the input is right and always convert 'features'
# to character vec
if (!missing(features)) {
Expand Down Expand Up @@ -453,3 +453,31 @@ getTaskFactorLevels = function(task) {
getTaskWeights = function(task) {
task$weights
}


#' @title Create a dictionary based on the task.
#'
#' @description Returns a dictionary, which contains the \link{Task} itself
#' (\code{task}), the number of features (\code{p}), the number of
#' observations (\code{n}), the task type (\code{type}) and in case of
#' classification tasks, the number of class levels (\code{k}).
#'
#' @template arg_task
#' @return [\code{\link[base]{list}}]. Used for evaluating the expressions
#' within a parameter, parameter set or list of parameters.
#' @family task
#' @export
#' @examples
#' task = makeClassifTask(data = iris, target = "Species")
#' getTaskDictionary(task)
getTaskDictionary = function(task) {
dict = list(
task = task,
p = getTaskNFeats(task),
n = getTaskSize(task),
type = getTaskType(task)
)
if (dict$type == "classif")
dict$k = length(getTaskClassLevels(task))
return(dict)
}
77 changes: 77 additions & 0 deletions R/evaluateParamExpressions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#' @title Evaluates expressions within a learner or parameter set.
#'
#' @description
#' A \code{\link{Learner}} can contain unevaluated \code{\link[base]{expression}s}
#' as value for a hyperparameter. E.g., these expressions are used if the default
#' value depends on the task size or an upper limit for a parameter is given by
#' the number of features in a task. \code{evaluateParamExpressions} allows to
#' evaluate these expressions using a given dictionary, which holds the following
#' information:
#' \itemize{
#' \item{\code{task}:} the task itself, allowing to access any of its elements.
#' \item{\code{p}:} the number of features in the task
#' \item{\code{n}:} the number of observations in the task
#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel"
#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks)
#' }
#' Usually the evaluation of the expression is performed automatically, e.g. in
#' \code{\link{train}} or \code{\link{tuneParams}}. Therefore calling
#' \code{evaluateParamExpressions} manually should not be necessary.
#' It is also possible to directly evaluate the expressions of a
#' \code{\link[ParamHelpers]{ParamSet}}, \code{\link[base]{list}} of
#' \code{\link[ParamHelpers]{Param}s} or single \code{\link[ParamHelpers]{Param}s}.
#' For further information on these, please refer to the documentation of the
#' \code{ParamHelpers} package.
#'
#' @param obj [\code{\link{Learner}}]\cr
#' The learner. If you pass a string the learner will be created via
#' \code{\link{makeLearner}}. Expressions within \code{length}, \code{lower}
#' or \code{upper} boundaries, \code{default} or \code{value} will be
#' evaluated using the provided dictionary (\code{dict}).
#' @param dict [\code{environment} | \code{list} | \code{NULL}]\cr
#' Environment or list which will be used for evaluating the variables
#' of expressions within a parameter, parameter set or list of parameters.
#' The default is \code{NULL}.
#' @return [\code{\link{Learner}}].
#' @export
#' @examples
#' ## (1) evaluation of a learner's hyperparameters
#' task = makeClassifTask(data = iris, target = "Species")
#' dict = getTaskDictionary(task = task)
#' lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p),
#' minbucket = expression(3L + 4L * task$task.desc$has.blocking))
#' lrn2 = evaluateParamExpressions(obj = lrn1, dict = dict)
#'
#' getHyperPars(lrn1)
#' getHyperPars(lrn2)
#'
#' ## (2) evaluation of a learner's entire parameter set
#' task = makeClassifTask(data = iris, target = "Species")
#' dict = getTaskDictionary(task = task)
#' lrn1 = makeLearner("classif.randomForest")
#' lrn2 = evaluateParamExpressions(obj = lrn1, dict = dict)
#'
#' ## Note the values for parameters 'mtry', 'classwt' and 'cutoff'
#' lrn1$par.set
#' lrn2$par.set
#'
#' ## (3) evaluation of a parameter set
#' task = makeClassifTask(data = iris, target = "Species")
#' dict = getTaskDictionary(task = task)
#' ps1 = makeParamSet(
#' makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x),
#' makeDiscreteParam("sigma", values = expression(list(k, p)))
#' )
#' ps2 = evaluateParamExpressions(obj = ps1, dict = dict)
#'
#' ps1
#' ps2
evaluateParamExpressions.Learner = function(obj, dict = NULL) {
obj = checkLearner(obj)
if (hasExpression(obj)) {
assertList(dict, null.ok = TRUE)
obj$par.set = evaluateParamExpressions(obj = obj$par.set, dict = dict)
obj$par.vals = evaluateParamExpressions(obj = obj$par.vals, dict = dict)
}
return(obj)
}
2 changes: 2 additions & 0 deletions R/makeLearner.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@
#' @return [\code{\link{Learner}}].
#' @family learner
#' @export
#' @note Learners can contain task dependent expressions, see \code{\link{evaluateParamExpressions}} for more information.
#' @aliases Learner
#' @examples
#' makeLearner("classif.rpart")
#' makeLearner("classif.lda", predict.type = "prob")
#' makeLearner("classif.rpart", minsplit = expression(k))
#' lrn = makeLearner("classif.lda", method = "t", nu = 10)
#' print(lrn$par.vals)
makeLearner = function(cl, id = cl, predict.type = "response", predict.threshold = NULL,
Expand Down
6 changes: 4 additions & 2 deletions R/setHyperPars.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@
#' @note If a named (hyper)parameter can't be found for the given learner, the 3
#' closest (hyper)parameter names will be output in case the user mistyped.
#' @export
#' @note Learners can contain task dependent expressions, see \code{\link{evaluateParamExpressions}} for more information.
#' @family learner
#' @importFrom utils adist
#' @examples
#' cl1 = makeLearner("classif.ksvm", sigma = 1)
#' cl2 = setHyperPars(cl1, sigma = 10, par.vals = list(C = 2))
#' cl3 = setHyperPars(cl2, C = expression(round(n / p)))
#' print(cl1)
#' # note the now set and altered hyperparameters:
#' print(cl2)
#' print(cl3)
setHyperPars = function(learner, ..., par.vals = list()) {
args = list(...)
assertClass(learner, classes = "Learner")
Expand Down Expand Up @@ -74,7 +76,7 @@ setHyperPars2.Learner = function(learner, par.vals) {
learner$par.set$pars[[n]] = makeUntypedLearnerParam(id = n)
learner$par.vals[[n]] = p
} else {
if (on.par.out.of.bounds != "quiet" && !isFeasible(pd, p)) {
if (on.par.out.of.bounds != "quiet" && !isFeasible(pd, p) && !is.expression(p)) {
msg = sprintf("%s is not feasible for parameter '%s'!", convertToShortString(p), pd$id)
if (on.par.out.of.bounds == "stop") {
stop(msg)
Expand Down
4 changes: 4 additions & 0 deletions R/train.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
train = function(learner, task, subset, weights = NULL) {
learner = checkLearner(learner)
assertClass(task, classes = "Task")
if (hasExpression(learner)) {
dict = getTaskDictionary(task = task)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The dict should somehow contain information about subset. The learner only sees the subset task, and probably expects the parameters to behave accordingly.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would assume that the task is already subsetted?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Subsetting happens in trainLearner (e.g.)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course. I remember... what I don't remember is the motivation behind this though.

learner = evaluateParamExpressions(obj = learner, dict = dict)
}
if (missing(subset)) {
subset = seq_len(getTaskSize(task))
} else {
Expand Down
23 changes: 21 additions & 2 deletions R/tuneParams.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,18 @@
#' @param par.set [\code{\link[ParamHelpers]{ParamSet}}]\cr
#' Collection of parameters and their constraints for optimization.
#' Dependent parameters with a \code{requires} field must use \code{quote} and not
#' \code{expression} to define it.
#' \code{expression} to define it. On the other hand, task dependent parameters
#' need to be defined with expressions.
#' @param control [\code{\link{TuneControl}}]\cr
#' Control object for search method. Also selects the optimization algorithm for tuning.
#' @template arg_showinfo
#' @return [\code{\link{TuneResult}}].
#' @family tune
#' @note If you would like to include results from the training data set, make
#' sure to appropriately adjust the resampling strategy and the aggregation for
#' the measure. See example code below.
#' the measure. See example code below.\cr
#' Note that learners and parameter sets can contain task dependent
#' expressions, see \code{\link{evaluateParamExpressions}} for more information.
#' @export
#' @examples
#' # a grid search for an SVM (with a tiny number of points...)
Expand All @@ -50,6 +53,16 @@
#' print(head(generateHyperParsEffectData(res)))
#' print(head(generateHyperParsEffectData(res, trafo = TRUE)))
#'
#' # tuning the parameters 'C' and 'sigma' of an SVM, where the boundaries
#' # of 'sigma' depend on the number of features
#' ps = makeParamSet(
#' makeNumericLearnerParam("sigma", lower = expression(0.2 * p), upper = expression(2.5 * p)),
#' makeDiscreteLearnerParam("C", values = 2^c(-1, 1))
#' )
#' rdesc = makeResampleDesc("Subsample")
#' ctrl = makeTuneControlRandom(maxit = 2L)
#' res = tuneParams("classif.ksvm", iris.task, par.set = ps, control = ctrl, resampling = rdesc)
#'
#' \dontrun{
#' # we optimize the SVM over 3 kernels simultanously
#' # note how we use dependent params (requires = ...) and iterated F-racing here
Expand Down Expand Up @@ -79,8 +92,14 @@
tuneParams = function(learner, task, resampling, measures, par.set, control, show.info = getMlrOption("show.info")) {
learner = checkLearner(learner)
assertClass(task, classes = "Task")
dict = getTaskDictionary(task = task)
if (hasExpression(learner)) {
learner = evaluateParamExpressions(obj = learner, dict = dict)
}
measures = checkMeasures(measures, learner)
assertClass(par.set, classes = "ParamSet")
if (hasExpression(par.set))
par.set = evaluateParamExpressions(obj = par.set, dict = dict)
assertClass(control, classes = "TuneControl")
if (!inherits(resampling, "ResampleDesc") && !inherits(resampling, "ResampleInstance"))
stop("Argument resampling must be of class ResampleDesc or ResampleInstance!")
Expand Down
6 changes: 6 additions & 0 deletions man/LearnerProperties.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

80 changes: 80 additions & 0 deletions man/evaluateParamExpressions.Learner.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading