diff --git a/NAMESPACE b/NAMESPACE index e07496d760..185775f411 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ S3method(downsample,Task) S3method(estimateRelativeOverfitting,ResampleDesc) S3method(estimateResidualVariance,Learner) S3method(estimateResidualVariance,WrappedModel) +S3method(evaluateParamExpressions,Learner) S3method(generateCalibrationData,BenchmarkResult) S3method(generateCalibrationData,Prediction) S3method(generateCalibrationData,ResampleResult) @@ -96,6 +97,7 @@ S3method(getTaskTargetNames,TaskDescUnsupervised) S3method(getTaskTargets,CostSensTask) S3method(getTaskTargets,SupervisedTask) S3method(getTaskTargets,UnsupervisedTask) +S3method(hasExpression,Learner) S3method(impute,Task) S3method(impute,data.frame) S3method(isFailureModel,BaseWrapperModel) @@ -866,6 +868,7 @@ export(getTaskClassLevels) export(getTaskCosts) export(getTaskData) export(getTaskDescription) +export(getTaskDictionary) export(getTaskFeatureNames) export(getTaskFormula) export(getTaskId) diff --git a/R/Learner_properties.R b/R/Learner_properties.R index f1ab237c32..7e9ae457df 100644 --- a/R/Learner_properties.R +++ b/R/Learner_properties.R @@ -78,3 +78,11 @@ listLearnerProperties = function(type = "any") { assertSubset(type, allProps) mlr$learner.properties[[type]] } + +#' @param obj [\code{\link{Learner}} | \code{character(1)}]\cr +#' Same as \code{learner} above. +#' @rdname LearnerProperties +#' @export +hasExpression.Learner = function(obj) { + any(hasExpression(obj$par.set)) || any(vlapply(obj$par.vals, is.expression)) +} diff --git a/R/RLearner_classif_randomForest.R b/R/RLearner_classif_randomForest.R index 839c722d0b..5e14aa39fe 100644 --- a/R/RLearner_classif_randomForest.R +++ b/R/RLearner_classif_randomForest.R @@ -5,10 +5,10 @@ makeRLearner.classif.randomForest = function() { package = "randomForest", par.set = makeParamSet( makeIntegerLearnerParam(id = "ntree", default = 500L, lower = 1L), - makeIntegerLearnerParam(id = "mtry", lower = 1L), + makeIntegerLearnerParam(id = "mtry", lower = 1L, default = expression(floor(sqrt(p)))), makeLogicalLearnerParam(id = "replace", default = TRUE), - makeNumericVectorLearnerParam(id = "classwt", lower = 0), - makeNumericVectorLearnerParam(id = "cutoff", lower = 0, upper = 1), + makeNumericVectorLearnerParam(id = "classwt", lower = 0, len = expression(k)), + makeNumericVectorLearnerParam(id = "cutoff", lower = 0, upper = 1, len = expression(k)), makeUntypedLearnerParam(id = "strata", tunable = FALSE), makeIntegerVectorLearnerParam(id = "sampsize", lower = 1L), makeIntegerLearnerParam(id = "nodesize", default = 1L, lower = 1L), diff --git a/R/Task_operators.R b/R/Task_operators.R index 39eeae2b08..5f72a6d7fe 100644 --- a/R/Task_operators.R +++ b/R/Task_operators.R @@ -454,3 +454,32 @@ getTaskFactorLevels = function(task) { getTaskWeights = function(task) { task$weights } + + +#' @title Create a dictionary based on the task. +#' +#' @description Returns a dictionary, which contains the \link{Task} itself +#' (\code{task}), the number of features (\code{p}), the number of +#' observations (\code{n}), the task type (\code{type}) and in case of +#' classification tasks, the number of class levels (\code{k}). +#' +#' @template arg_task +#' @return [\code{\link[base]{list}}]. Used for evaluating the expressions +#' within a parameter, parameter set or list of parameters. +#' @family task +#' @export +#' @examples +#' task = makeClassifTask(data = iris, target = "Species") +#' getTaskDictionary(task) +getTaskDictionary = function(task) { + assertClass(task, classes = "Task") + dict = list( + task = task, + p = getTaskNFeats(task), + n = getTaskSize(task), + type = getTaskType(task) + ) + if (dict$type == "classif") + dict$k = length(getTaskClassLevels(task)) + return(dict) +} diff --git a/R/evaluateParamExpressions.R b/R/evaluateParamExpressions.R new file mode 100644 index 0000000000..d02ffcdeed --- /dev/null +++ b/R/evaluateParamExpressions.R @@ -0,0 +1,77 @@ +#' @title Evaluates expressions within a learner or parameter set. +#' +#' @description +#' A \code{\link{Learner}} can contain unevaluated \code{\link[base]{expression}s} +#' as value for a hyperparameter. E.g., these expressions are used if the default +#' value depends on the task size or an upper limit for a parameter is given by +#' the number of features in a task. \code{evaluateParamExpressions} allows to +#' evaluate these expressions using a given dictionary, which holds the following +#' information: +#' \itemize{ +#' \item{\code{task}:} the task itself, allowing to access any of its elements. +#' \item{\code{p}:} the number of features in the task +#' \item{\code{n}:} the number of observations in the task +#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" +#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) +#' } +#' Usually the evaluation of the expression is performed automatically, e.g. in +#' \code{\link{train}} or \code{\link{tuneParams}}. Therefore calling +#' \code{evaluateParamExpressions} manually should not be necessary. +#' It is also possible to directly evaluate the expressions of a +#' \code{\link[ParamHelpers]{ParamSet}}, \code{\link[base]{list}} of +#' \code{\link[ParamHelpers]{Param}s} or single \code{\link[ParamHelpers]{Param}s}. +#' For further information on these, please refer to the documentation of the +#' \code{ParamHelpers} package. +#' +#' @param obj [\code{\link{Learner}}]\cr +#' The learner. If you pass a string the learner will be created via +#' \code{\link{makeLearner}}. Expressions within \code{length}, \code{lower} +#' or \code{upper} boundaries, \code{default} or \code{value} will be +#' evaluated using the provided dictionary (\code{dict}). +#' @param dict [\code{environment} | \code{list} | \code{NULL}]\cr +#' Environment or list which will be used for evaluating the variables +#' of expressions within a parameter, parameter set or list of parameters. +#' The default is \code{NULL}. +#' @return [\code{\link{Learner}}]. +#' @export +#' @examples +#' ## (1) evaluation of a learner's hyperparameters +#' task = makeClassifTask(data = iris, target = "Species") +#' dict = getTaskDictionary(task = task) +#' lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p), +#' minbucket = expression(3L + 4L * task$task.desc$has.blocking)) +#' lrn2 = evaluateParamExpressions(obj = lrn1, dict = dict) +#' +#' getHyperPars(lrn1) +#' getHyperPars(lrn2) +#' +#' ## (2) evaluation of a learner's entire parameter set +#' task = makeClassifTask(data = iris, target = "Species") +#' dict = getTaskDictionary(task = task) +#' lrn1 = makeLearner("classif.randomForest") +#' lrn2 = evaluateParamExpressions(obj = lrn1, dict = dict) +#' +#' ## Note the values for parameters 'mtry', 'classwt' and 'cutoff' +#' lrn1$par.set +#' lrn2$par.set +#' +#' ## (3) evaluation of a parameter set +#' task = makeClassifTask(data = iris, target = "Species") +#' dict = getTaskDictionary(task = task) +#' ps1 = makeParamSet( +#' makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x), +#' makeDiscreteParam("sigma", values = expression(list(k, p))) +#' ) +#' ps2 = evaluateParamExpressions(obj = ps1, dict = dict) +#' +#' ps1 +#' ps2 +evaluateParamExpressions.Learner = function(obj, dict = NULL) { + obj = checkLearner(obj) + if (hasExpression(obj)) { + assertList(dict, null.ok = TRUE) + obj$par.set = evaluateParamExpressions(obj = obj$par.set, dict = dict) + obj$par.vals = evaluateParamExpressions(obj = obj$par.vals, dict = dict) + } + return(obj) +} diff --git a/R/makeLearner.R b/R/makeLearner.R index 3dd5fabf7c..97c5c15c2a 100644 --- a/R/makeLearner.R +++ b/R/makeLearner.R @@ -43,10 +43,12 @@ #' @return [\code{\link{Learner}}]. #' @family learner #' @export +#' @note Learners can contain task dependent expressions, see \code{\link{evaluateParamExpressions}} for more information. #' @aliases Learner #' @examples #' makeLearner("classif.rpart") #' makeLearner("classif.lda", predict.type = "prob") +#' makeLearner("classif.rpart", minsplit = expression(k)) #' lrn = makeLearner("classif.lda", method = "t", nu = 10) #' print(lrn$par.vals) makeLearner = function(cl, id = cl, predict.type = "response", predict.threshold = NULL, diff --git a/R/setHyperPars.R b/R/setHyperPars.R index d8be3e2ff0..08eaeadc12 100644 --- a/R/setHyperPars.R +++ b/R/setHyperPars.R @@ -11,14 +11,16 @@ #' @note If a named (hyper)parameter can't be found for the given learner, the 3 #' closest (hyper)parameter names will be output in case the user mistyped. #' @export +#' @note Learners can contain task dependent expressions, see \code{\link{evaluateParamExpressions}} for more information. #' @family learner #' @importFrom utils adist #' @examples #' cl1 = makeLearner("classif.ksvm", sigma = 1) #' cl2 = setHyperPars(cl1, sigma = 10, par.vals = list(C = 2)) +#' cl3 = setHyperPars(cl2, C = expression(round(n / p))) #' print(cl1) -#' # note the now set and altered hyperparameters: #' print(cl2) +#' print(cl3) setHyperPars = function(learner, ..., par.vals = list()) { args = list(...) assertClass(learner, classes = "Learner") @@ -73,7 +75,7 @@ setHyperPars2.Learner = function(learner, par.vals) { learner$par.set$pars[[n]] = makeUntypedLearnerParam(id = n) learner$par.vals[[n]] = p } else { - if (on.par.out.of.bounds != "quiet" && !isFeasible(pd, p)) { + if (on.par.out.of.bounds != "quiet" && !isFeasible(pd, p) && !is.expression(p)) { msg = sprintf("%s is not feasible for parameter '%s'!", convertToShortString(p), pd$id) if (on.par.out.of.bounds == "stop") { stop(msg) diff --git a/R/train.R b/R/train.R index e2b6f4c522..458adfdcd7 100644 --- a/R/train.R +++ b/R/train.R @@ -31,6 +31,10 @@ train = function(learner, task, subset, weights = NULL) { learner = checkLearner(learner) assertClass(task, classes = "Task") + if (hasExpression(learner)) { + dict = getTaskDictionary(task = task) + learner = evaluateParamExpressions(obj = learner, dict = dict) + } if (missing(subset)) { subset = seq_len(getTaskSize(task)) } else { diff --git a/R/tuneParams.R b/R/tuneParams.R index 1b859af82b..304993db54 100644 --- a/R/tuneParams.R +++ b/R/tuneParams.R @@ -22,7 +22,8 @@ #' @param par.set [\code{\link[ParamHelpers]{ParamSet}}]\cr #' Collection of parameters and their constraints for optimization. #' Dependent parameters with a \code{requires} field must use \code{quote} and not -#' \code{expression} to define it. +#' \code{expression} to define it. On the other hand, task dependent parameters +#' need to be defined with expressions. #' @param control [\code{\link{TuneControl}}]\cr #' Control object for search method. Also selects the optimization algorithm for tuning. #' @template arg_showinfo @@ -31,6 +32,8 @@ #' @note If you would like to include results from the training data set, make #' sure to appropriately adjust the resampling strategy and the aggregation for #' the measure. See example code below. +#' Also note that learners and parameter sets can contain task dependent +#' expressions, see \code{\link{evaluateParamExpressions}} for more information. #' @export #' @examples #' # a grid search for an SVM (with a tiny number of points...) @@ -50,6 +53,16 @@ #' print(head(generateHyperParsEffectData(res))) #' print(head(generateHyperParsEffectData(res, trafo = TRUE))) #' +#' # tuning the parameters 'C' and 'sigma' of an SVM, where the boundaries +#' # of 'sigma' depend on the number of features +#' ps = makeParamSet( +#' makeNumericLearnerParam("sigma", lower = expression(0.2 * p), upper = expression(2.5 * p)), +#' makeDiscreteLearnerParam("C", values = 2^c(-1, 1)) +#' ) +#' rdesc = makeResampleDesc("Subsample") +#' ctrl = makeTuneControlRandom(maxit = 2L) +#' res = tuneParams("classif.ksvm", iris.task, par.set = ps, control = ctrl, resampling = rdesc) +#' #' \dontrun{ #' # we optimize the SVM over 3 kernels simultanously #' # note how we use dependent params (requires = ...) and iterated F-racing here @@ -81,6 +94,11 @@ tuneParams = function(learner, task, resampling, measures, par.set, control, sho assertClass(task, classes = "Task") measures = checkMeasures(measures, learner) assertClass(par.set, classes = "ParamSet") + if (hasExpression(learner) || hasExpression(par.set)) { + dict = getTaskDictionary(task = task) + learner = evaluateParamExpressions(obj = learner, dict = dict) + par.set = evaluateParamExpressions(obj = par.set, dict = dict) + } assertClass(control, classes = "TuneControl") if (!inherits(resampling, "ResampleDesc") && !inherits(resampling, "ResampleInstance")) stop("Argument resampling must be of class ResampleDesc or ResampleInstance!") @@ -113,5 +131,3 @@ tuneParams = function(learner, task, resampling, measures, par.set, control, sho messagef("[Tune] Result: %s : %s", paramValueToString(par.set, or$x), perfsToString(or$y)) return(or) } - - diff --git a/man/LearnerProperties.Rd b/man/LearnerProperties.Rd index b04d54c5ea..08b3a54f5f 100644 --- a/man/LearnerProperties.Rd +++ b/man/LearnerProperties.Rd @@ -6,11 +6,14 @@ \alias{hasLearnerProperties} \alias{getLearnerProperties} \alias{hasLearnerProperties} +\alias{hasExpression.Learner} \title{Query properties of learners.} \usage{ getLearnerProperties(learner) hasLearnerProperties(learner, props) + +\method{hasExpression}{Learner}(obj) } \arguments{ \item{learner}{[\code{\link{Learner}} | \code{character(1)}]\cr @@ -19,6 +22,9 @@ If you pass a string the learner will be created via \code{\link{makeLearner}}.} \item{props}{[\code{character}]\cr Vector of properties to query.} + +\item{obj}{[\code{\link{Learner}} | \code{character(1)}]\cr +Same as \code{learner} above.} } \value{ \code{getLearnerProperties} returns a character vector with learner properties. diff --git a/man/evaluateParamExpressions.Learner.Rd b/man/evaluateParamExpressions.Learner.Rd new file mode 100644 index 0000000000..887f17c351 --- /dev/null +++ b/man/evaluateParamExpressions.Learner.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/evaluateParamExpressions.R +\name{evaluateParamExpressions.Learner} +\alias{evaluateParamExpressions.Learner} +\title{Evaluates expressions within a learner or parameter set.} +\usage{ +\method{evaluateParamExpressions}{Learner}(obj, dict = NULL) +} +\arguments{ +\item{obj}{[\code{\link{Learner}}]\cr +The learner. If you pass a string the learner will be created via +\code{\link{makeLearner}}. Expressions within \code{length}, \code{lower} +or \code{upper} boundaries, \code{default} or \code{value} will be +evaluated using the provided dictionary (\code{dict}).} + +\item{dict}{[\code{environment} | \code{list} | \code{NULL}]\cr +Environment or list which will be used for evaluating the variables +of expressions within a parameter, parameter set or list of parameters. +The default is \code{NULL}.} +} +\value{ +[\code{\link{Learner}}]. +} +\description{ +A \code{\link{Learner}} can contain unevaluated \code{\link[base]{expression}s} +as value for a hyperparameter. E.g., these expressions are used if the default +value depends on the task size or an upper limit for a parameter is given by +the number of features in a task. \code{evaluateParamExpressions} allows to +evaluate these expressions using a given dictionary, which holds the following +information: +\itemize{ + \item{\code{task}:} the task itself, allowing to access any of its elements. + \item{\code{p}:} the number of features in the task + \item{\code{n}:} the number of observations in the task + \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" + \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) +} +Usually the evaluation of the expression is performed automatically, e.g. in +\code{\link{train}} or \code{\link{tuneParams}}. Therefore calling +\code{evaluateParamExpressions} manually should not be necessary. +It is also possible to directly evaluate the expressions of a +\code{\link[ParamHelpers]{ParamSet}}, \code{\link[base]{list}} of +\code{\link[ParamHelpers]{Param}s} or single \code{\link[ParamHelpers]{Param}s}. +For further information on these, please refer to the documentation of the +\code{ParamHelpers} package. +} +\examples{ +## (1) evaluation of a learner's hyperparameters +task = makeClassifTask(data = iris, target = "Species") +dict = getTaskDictionary(task = task) +lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p), + minbucket = expression(3L + 4L * task$task.desc$has.blocking)) +lrn2 = evaluateParamExpressions(obj = lrn1, dict = dict) + +getHyperPars(lrn1) +getHyperPars(lrn2) + +## (2) evaluation of a learner's entire parameter set +task = makeClassifTask(data = iris, target = "Species") +dict = getTaskDictionary(task = task) +lrn1 = makeLearner("classif.randomForest") +lrn2 = evaluateParamExpressions(obj = lrn1, dict = dict) + +## Note the values for parameters 'mtry', 'classwt' and 'cutoff' +lrn1$par.set +lrn2$par.set + +## (3) evaluation of a parameter set +task = makeClassifTask(data = iris, target = "Species") +dict = getTaskDictionary(task = task) +ps1 = makeParamSet( + makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x), + makeDiscreteParam("sigma", values = expression(list(k, p))) +) +ps2 = evaluateParamExpressions(obj = ps1, dict = dict) + +ps1 +ps2 +} diff --git a/man/getTaskClassLevels.Rd b/man/getTaskClassLevels.Rd index 597244357c..66f734d541 100644 --- a/man/getTaskClassLevels.Rd +++ b/man/getTaskClassLevels.Rd @@ -21,6 +21,7 @@ actually return the same thing. Other task: \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskCosts.Rd b/man/getTaskCosts.Rd index 0453efd608..d50d6bc582 100644 --- a/man/getTaskCosts.Rd +++ b/man/getTaskCosts.Rd @@ -24,6 +24,7 @@ Retuns \dQuote{NULL} if the task is not of type \dQuote{costsens}. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskData.Rd b/man/getTaskData.Rd index 8bdb3e0fb0..28fd59ac4a 100644 --- a/man/getTaskData.Rd +++ b/man/getTaskData.Rd @@ -63,6 +63,7 @@ head(getTaskData(task, subset = 1:100, recode.target = "01")) Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskDescription.Rd b/man/getTaskDescription.Rd index 067ffc1861..f4c51c8eb5 100644 --- a/man/getTaskDescription.Rd +++ b/man/getTaskDescription.Rd @@ -19,6 +19,7 @@ Get a summarizing task description. \seealso{ Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskDictionary.Rd b/man/getTaskDictionary.Rd new file mode 100644 index 0000000000..e19145405b --- /dev/null +++ b/man/getTaskDictionary.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Task_operators.R +\name{getTaskDictionary} +\alias{getTaskDictionary} +\title{Create a dictionary based on the task.} +\usage{ +getTaskDictionary(task) +} +\arguments{ +\item{task}{[\code{\link{Task}}]\cr +The task.} +} +\value{ +[\code{\link[base]{list}}]. Used for evaluating the expressions +within a parameter, parameter set or list of parameters. +} +\description{ +Returns a dictionary, which contains the \link{Task} itself +(\code{task}), the number of features (\code{p}), the number of +observations (\code{n}), the task type (\code{type}) and in case of +classification tasks, the number of class levels (\code{k}). +} +\examples{ +task = makeClassifTask(data = iris, target = "Species") +getTaskDictionary(task) +} +\seealso{ +Other task: \code{\link{getTaskClassLevels}}, + \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, + \code{\link{getTaskDescription}}, + \code{\link{getTaskFeatureNames}}, + \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, + \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, + \code{\link{getTaskTargetNames}}, + \code{\link{getTaskTargets}}, \code{\link{getTaskType}}, + \code{\link{subsetTask}} +} diff --git a/man/getTaskFeatureNames.Rd b/man/getTaskFeatureNames.Rd index b4be62e398..9e7cc0a08b 100644 --- a/man/getTaskFeatureNames.Rd +++ b/man/getTaskFeatureNames.Rd @@ -20,6 +20,7 @@ Target column name is not included. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, \code{\link{getTaskTargetNames}}, diff --git a/man/getTaskFormula.Rd b/man/getTaskFormula.Rd index 7a34993bbb..0dcb19f8e1 100644 --- a/man/getTaskFormula.Rd +++ b/man/getTaskFormula.Rd @@ -34,6 +34,7 @@ For multilabel it is \dQuote{ + ... + ~ .}. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskId.Rd b/man/getTaskId.Rd index 905282d411..e5ca9859c3 100644 --- a/man/getTaskId.Rd +++ b/man/getTaskId.Rd @@ -20,6 +20,7 @@ Get the id of the task. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskNFeats.Rd b/man/getTaskNFeats.Rd index 97c585f3d5..da95a064b4 100644 --- a/man/getTaskNFeats.Rd +++ b/man/getTaskNFeats.Rd @@ -20,6 +20,7 @@ Get number of features in task. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskSize.Rd b/man/getTaskSize.Rd index 525002d67e..67a4c83bda 100644 --- a/man/getTaskSize.Rd +++ b/man/getTaskSize.Rd @@ -20,6 +20,7 @@ Get number of observations in task. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, diff --git a/man/getTaskTargetNames.Rd b/man/getTaskTargetNames.Rd index 66941fff78..2ac09a6252 100644 --- a/man/getTaskTargetNames.Rd +++ b/man/getTaskTargetNames.Rd @@ -21,6 +21,7 @@ actually return the same thing. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskTargets.Rd b/man/getTaskTargets.Rd index 081c906fa0..5759145131 100644 --- a/man/getTaskTargets.Rd +++ b/man/getTaskTargets.Rd @@ -32,6 +32,7 @@ getTaskTargets(task) Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskType.Rd b/man/getTaskType.Rd index 49396fd02b..1d081b770f 100644 --- a/man/getTaskType.Rd +++ b/man/getTaskType.Rd @@ -20,6 +20,7 @@ Get the type of the task. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/makeLearner.Rd b/man/makeLearner.Rd index bf2c74c5b6..01f0da011e 100644 --- a/man/makeLearner.Rd +++ b/man/makeLearner.Rd @@ -72,9 +72,13 @@ value selects the label. The threshold used to assign the label can later be cha To see all possible properties of a learner, go to: \code{\link{LearnerProperties}}. } +\note{ +Learners can contain task dependent expressions, see \code{\link{evaluateParamExpressions}} for more information. +} \examples{ makeLearner("classif.rpart") makeLearner("classif.lda", predict.type = "prob") +makeLearner("classif.rpart", minsplit = expression(k)) lrn = makeLearner("classif.lda", method = "t", nu = 10) print(lrn$par.vals) } diff --git a/man/makeTuneWrapper.Rd b/man/makeTuneWrapper.Rd index 97f14a864a..e70eddf9c4 100644 --- a/man/makeTuneWrapper.Rd +++ b/man/makeTuneWrapper.Rd @@ -26,7 +26,8 @@ Default is the default measure for the task, see here \code{\link{getDefaultMeas \item{par.set}{[\code{\link[ParamHelpers]{ParamSet}}]\cr Collection of parameters and their constraints for optimization. Dependent parameters with a \code{requires} field must use \code{quote} and not -\code{expression} to define it.} +\code{expression} to define it. On the other hand, task dependent parameters +need to be defined with expressions.} \item{control}{[\code{\link{TuneControl}}]\cr Control object for search method. Also selects the optimization algorithm for tuning.} diff --git a/man/setHyperPars.Rd b/man/setHyperPars.Rd index 125c4df5af..9c997fa7b9 100644 --- a/man/setHyperPars.Rd +++ b/man/setHyperPars.Rd @@ -28,13 +28,16 @@ Set the hyperparameters of a learner object. \note{ If a named (hyper)parameter can't be found for the given learner, the 3 closest (hyper)parameter names will be output in case the user mistyped. + +Learners can contain task dependent expressions, see \code{\link{evaluateParamExpressions}} for more information. } \examples{ cl1 = makeLearner("classif.ksvm", sigma = 1) cl2 = setHyperPars(cl1, sigma = 10, par.vals = list(C = 2)) +cl3 = setHyperPars(cl2, C = expression(round(n / p))) print(cl1) -# note the now set and altered hyperparameters: print(cl2) +print(cl3) } \seealso{ Other learner: \code{\link{LearnerProperties}}, diff --git a/man/subsetTask.Rd b/man/subsetTask.Rd index 91a46e81aa..fd04e95eff 100644 --- a/man/subsetTask.Rd +++ b/man/subsetTask.Rd @@ -37,6 +37,7 @@ subsetTask(task, subset = 1:100) Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/tuneParams.Rd b/man/tuneParams.Rd index 95c9ec7024..74ce8f1c7b 100644 --- a/man/tuneParams.Rd +++ b/man/tuneParams.Rd @@ -29,7 +29,8 @@ Default is the default measure for the task, see here \code{\link{getDefaultMeas \item{par.set}{[\code{\link[ParamHelpers]{ParamSet}}]\cr Collection of parameters and their constraints for optimization. Dependent parameters with a \code{requires} field must use \code{quote} and not -\code{expression} to define it.} +\code{expression} to define it. On the other hand, task dependent parameters +need to be defined with expressions.} \item{control}{[\code{\link{TuneControl}}]\cr Control object for search method. Also selects the optimization algorithm for tuning.} @@ -53,7 +54,9 @@ Multi-criteria tuning can be done with \code{\link{tuneParamsMultiCrit}}. \note{ If you would like to include results from the training data set, make sure to appropriately adjust the resampling strategy and the aggregation for -the measure. See example code below. +the measure. See example code below.\cr +Note that learners and parameter sets can contain task dependent +expressions, see \code{\link{evaluateParamExpressions}} for more information. } \examples{ # a grid search for an SVM (with a tiny number of points...) @@ -73,6 +76,16 @@ print(head(as.data.frame(res$opt.path, trafo = TRUE))) print(head(generateHyperParsEffectData(res))) print(head(generateHyperParsEffectData(res, trafo = TRUE))) +# tuning the parameters 'C' and 'sigma' of an SVM, where the boundaries +# of 'sigma' depend on the number of features +ps = makeParamSet( + makeNumericLearnerParam("sigma", lower = expression(0.2 * p), upper = expression(2.5 * p)), + makeDiscreteLearnerParam("C", values = 2^c(-1, 1)) +) +rdesc = makeResampleDesc("Subsample") +ctrl = makeTuneControlRandom(maxit = 2L) +res = tuneParams("classif.ksvm", iris.task, par.set = ps, control = ctrl, resampling = rdesc) + \dontrun{ # we optimize the SVM over 3 kernels simultanously # note how we use dependent params (requires = ...) and iterated F-racing here diff --git a/tests/testthat/helper_mock_learners.R b/tests/testthat/helper_mock_learners.R index 0d952aa0b3..ddadeb7bfa 100644 --- a/tests/testthat/helper_mock_learners.R +++ b/tests/testthat/helper_mock_learners.R @@ -117,4 +117,23 @@ registerS3method("predictLearner", "regr.__mlrmocklearners__6", predictLearner.r +# contains expressions in the parameter set and in the hyper params +makeRLearner.classif.__mlrmocklearners__7 = function() { + makeRLearnerClassif( + cl = "classif.__mlrmocklearners__7", + package = character(0L), + par.set = makeParamSet( + makeIntegerLearnerParam(id = "minsplit", default = 20L, lower = 1L), + makeIntegerLearnerParam(id = "mtry", lower = 1L, default = expression(floor(sqrt(p)))), + makeLogicalLearnerParam(id = "importance", default = expression(task$task.desc$has.blocking)), + makeNumericVectorLearnerParam(id = "classwt", lower = 0, len = expression(k)), + keys = c("task.desc", "has.blocking") + ), + par.vals = list(minsplit = expression(ceiling(0.1 * (n + p)))), + properties = c("twoclass", "multiclass", "numerics", "factors", "ordered", "prob"), + name = "Mock Learner 7", + short.name = "mock7" + ) +} +registerS3method("makeRLearner", "classif.__mlrmocklearners__7", makeRLearner.classif.__mlrmocklearners__7) diff --git a/tests/testthat/test_base_evaluateParamExpressions.R b/tests/testthat/test_base_evaluateParamExpressions.R new file mode 100644 index 0000000000..665306e2c3 --- /dev/null +++ b/tests/testthat/test_base_evaluateParamExpressions.R @@ -0,0 +1,72 @@ +context("evaluate param expressions") + +test_that("expressions in learners", { + ## expressions within 'pre-defined' learners + ## (1) expressions within default of parameter sets + lrn1 = makeLearner("classif.__mlrmocklearners__7") + dict = getTaskDictionary(task = binaryclass.task) + lrn2 = evaluateParamExpressions(obj = lrn1, dict = dict) + x1 = lrn1$par.set$pars$mtry$default + x2 = lrn2$par.set$pars$mtry$default + expect_true(is.expression(x1)) + expect_true(!is.expression(x2)) + expect_equal(x2, floor(sqrt(ncol(binaryclass.df)))) + + ## (2) expressions within length of parameter sets + x1 = lrn1$par.set$pars$classwt$len + x2 = lrn2$par.set$pars$classwt$len + expect_true(is.expression(x1)) + expect_true(!is.expression(x2)) + expect_equal(x2, 2L) + + ## (3) expressions that go deeper into the task + x1 = lrn1$par.set$pars$importance$default + x2 = lrn2$par.set$pars$importance$default + expect_true(is.expression(x1)) + expect_true(!is.expression(x2)) + expect_equal(x2, binaryclass.task$task.desc$has.blocking) + + ## (4) expressions within hyperparameters + x1 = lrn1$par.vals$minsplit + x2 = lrn2$par.vals$minsplit + expect_true(is.expression(x1)) + expect_true(!is.expression(x2)) + expect_equal(x2, ceiling(0.1 * sum(dim(binaryclass.df)))) + + ## manually constructed expressions within hyperparams + lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) + dict = getTaskDictionary(task = binaryclass.task) + lrn2 = evaluateParamExpressions(obj = lrn1, dict = dict) + x1 = lrn1$par.vals$minsplit + x2 = lrn2$par.vals$minsplit + expect_true(is.expression(x1)) + expect_true(!is.expression(x2)) + expect_equal(lrn2$par.vals$minsplit, 2 * getTaskNFeats(binaryclass.task)) +}) + +test_that("expressions in parameter sets", { + ps1 = makeParamSet( + makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x), + makeDiscreteParam("sigma", values = expression(list(p, k))) + ) + dict = getTaskDictionary(task = binaryclass.task) + ps2 = evaluateParamExpressions(obj = ps1, dict = dict) + + ## expressions within parameter sets + expect_equal(ps2$pars$C$lower, 2L) + expect_equal(ps2$pars$C$upper, 208L) + expect_equal(ps2$pars$sigma$values, list(60, 2)) +}) + +test_that("tuning works with expressions", { + task = multiclass.small.task + lrn = makeLearner("classif.rpart") + lrn = makeFilterWrapper(lrn, fw.method = "kruskal.test") + ps = makeParamSet( + makeIntegerParam("fw.abs", lower = 1, upper = expression(ceiling(n/2))) + ) + ctrl = makeTuneControlRandom(maxit = 5) + res = tuneParams(lrn, task = task, resampling = hout, par.set = ps, control = ctrl) + res = as.data.frame(res$opt.path) + expect_integer(res$fw.abs, lower = 1, upper = ceiling(getTaskSize(task)/2), any.missing = FALSE) +}) diff --git a/tests/testthat/test_base_measures.R b/tests/testthat/test_base_measures.R index 5577ea880e..2685d69246 100644 --- a/tests/testthat/test_base_measures.R +++ b/tests/testthat/test_base_measures.R @@ -262,10 +262,8 @@ test_that("check measure calculations", { expect_warning(measureRAE(c(1,1,1,1),c(1,2,3,4))) expect_silent(measureRAE(c(1,1,1,0),c(2,2,2,2))) # mape - suppressWarnings({ - expect_equal(NA, mape$fun(pred = pred.regr)) - expect_equal(NA, measureMAPE(c(5, 10, 0, 5),c(4, 11, 0, 4))) - }) + expect_equal(NA, suppressWarnings(mape$fun(pred = pred.regr))) + expect_equal(NA, suppressWarnings(measureMAPE(c(5, 10, 0, 5),c(4, 11, 0, 4)))) expect_warning(mape$fun(pred = pred.regr), regexp = "MAPE is undefined if any truth value is equal to 0.") expect_warning(measureMAPE(c(5, 10, 0, 5),c(4, 11, 0, 4)), regexp = "MAPE is undefined if any truth value is equal to 0.") pred.regr.mape = pred.regr