cleanup; added test

mlr-org · Aug 15, 2016 · e0b4728 · e0b4728
1 parent 00d92f8
commit e0b4728
Show file tree

Hide file tree

Showing 13 changed files with 85 additions and 105 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -33,10 +33,11 @@ Encoding: UTF-8
 Roxygen: list(wrap = FALSE)
 Depends:
     R (>= 3.0.2),
-    ParamHelpers (>= 1.8),
+    ParamHelpers (>= 1.9)
 Imports:
     BBmisc (>= 1.10),
     ggplot2,
+    ParamHelpers (>= 1.9),
     stats,
     stringi,
     checkmate (>= 1.8.1),

diff --git a/NAMESPACE b/NAMESPACE
@@ -86,6 +86,7 @@ S3method(getTaskTargetNames,TaskDescUnsupervised)
 S3method(getTaskTargets,CostSensTask)
 S3method(getTaskTargets,SupervisedTask)
 S3method(getTaskTargets,UnsupervisedTask)
+S3method(hasExpression,Learner)
 S3method(impute,Task)
 S3method(impute,data.frame)
 S3method(isFailureModel,BaseWrapperModel)

diff --git a/R/Learner_properties.R b/R/Learner_properties.R
@@ -76,3 +76,7 @@ getSupportedLearnerProperties = function(type = NA_character_) {
     p[[type]]
 }
 
+#' @export
+hasExpression.Learner = function(par) {
+  any(hasExpression(par$par.set)) || any(vlapply(par$par.vals, is.expression))
+}
diff --git a/R/Task_operators.R b/R/Task_operators.R
@@ -254,7 +254,7 @@ getTaskData = function(task, subset, features, target.extra = FALSE, recode.targ
   checkTask(task, "Task")
 
   if (missing(subset)) {
-    subset = NULL 
+    subset = NULL
   } else {
     assert(checkIntegerish(subset), checkLogical(subset))
     if (is.logical(subset))
@@ -266,7 +266,7 @@ getTaskData = function(task, subset, features, target.extra = FALSE, recode.targ
   assertLogical(target.extra)
 
   task.features = getTaskFeatureNames(task)
-  
+
   # if supplied check if the input is right and always convert 'features'
   # to character vec
   if (!missing(features)) {
@@ -459,13 +459,13 @@ getTaskWeights = function(task) {
 # features (p), the number of observations (n), the task type (type) and in
 # case of classification tasks the number of class levels (k)
 makeTaskDictionary = function(task) {
-  dict = list()
-  dict$task = task
-  dict$p = getTaskNFeats(task)
-  dict$n = getTaskSize(task)
-  dict$type = getTaskType(task)
+  dict = list(
+    task = task,
+    p = getTaskNFeats(task),
+    n = getTaskSize(task),
+    type = getTaskType(task)
+  )
   if (dict$type == "classif")
     dict$k = length(getTaskClassLevels(task))
-  # dict$keys = setdiff(c(names(task$task.desc), names(task), "data", names(task$env$data)), names(dict))
   return(dict)
 }
diff --git a/R/evaluateLearner.R b/R/evaluateLearner.R
@@ -1,7 +1,22 @@
 #' @title Evaluates expressions within a learner or parameter set according to the task.
 #'
-#' @description Updates learners and/or parameter sets by evaluating their expressions
-#' based on a specific task. An overview of the possible expressions can be found in the details.
+#' @description
+#' A \code{\link{Learner}} or \code{\link[ParamHelpers]{ParamSet}} can contain an unevaluated \code{\link[base]{expression}}
+#' as value for a hyperparameter.
+#' E.g., these expressions are used if the default value dependents on the task size or an upper limit for a parameter
+#' is given by the number of features in a task.
+#' The provided functions evaluate such expressions in an environment (dictionary) which holds the following information:
+#' \itemize{
+#'   \item{\code{task}:} the task itself, allowing to access any of its elements.
+#'   \item{\code{p}:} the number of features in the task
+#'   \item{\code{n}:} the number of observations in the task
+#'   \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel"
+#'   \item{\code{k}:} the number of classes of the target variable (only available for classification tasks)
+#' }
+#' Usually the evaluation of the expression is performed automatically, e.g. in \code{\link{train}} or
+#' \code{\link{tuneParams}}.
+#' Therefore calling \code{evaluateParamSet} or \code{evaluateLearner} manually should not be necessary.
+#'
 #' @template arg_learner
 #' @param par.set [\code{\link[ParamHelpers]{ParamSet}}]\cr
 #'   Parameter set of (hyper)parameters and their constraints.
@@ -11,64 +26,52 @@
 #' @return [\code{\link{Learner}} | \code{\link[ParamHelpers]{ParamSet}}].
 #' @name evaluateLearner
 #' @rdname evaluateLearner
-#' @details The expressions can be based on any information provided by the task. For convenience,
-#' the most often used keys are available directly
-#' \itemize{
-#'   \item{\code{task}:} the task itself, allowing to access any of its elements
-#'   \item{\code{p}:} the number of features in the task
-#'   \item{\code{n}:} the number of observations in the task
-#'   \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel"
-#'   \item{\code{k}:} the number of classes of the target variable (only available for classification tasks)
-#' }
-#' However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could
-#' access the "blocking" via \code{task$task.desc$has.blocking}.
+#' @export
 #' @examples
 #' ## (1) evaluation of a learner's hyperparameters
 #' task = makeClassifTask(data = iris, target = "Species")
 #' lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p),
 #'   minbucket = expression(3L + 4L * task$task.desc$has.blocking))
 #' lrn2 = evaluateLearner(learner = lrn1, task = task)
-#' 
-#' lrn1$par.vals
-#' lrn2$par.vals
-#' 
+#'
+#' getHyperPars(lrn1)
+#' getHyperPars(lrn2)
+#'
 #' ## (2) evaluation of a learner's entire parameter set
 #' task = makeClassifTask(data = iris, target = "Species")
 #' lrn1 = makeLearner("classif.randomForest")
 #' lrn2 = evaluateLearner(learner = lrn1, task = task)
-#' 
-#' ## focus on the parameters 'mtry', 'classwt' and 'cutoff'
-#' lrn1$par.set
-#' lrn2$par.set
-#' 
+#'
+#' ## Note the values for parameters 'mtry', 'classwt' and 'cutoff'
+#' getParamSet(lrn1)
+#' getParamSet(lrn2)
+#'
 #' ## (3) evaluation of a parameter set
 #' task = makeClassifTask(data = iris, target = "Species")
 #' ps1 = makeParamSet(
 #'   makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x),
 #'   makeDiscreteParam("sigma", values = expression(list(k, p)))
 #' )
-#' ps2 = evaluateParset(par.set = ps1, task = task)
-#' @export
+#' evaluateParset(par.set = ps1, task = task)
 evaluateLearner = function(learner, task) {
   dict = makeTaskDictionary(task = task)
   learner$par.set = evaluateParset(learner$par.set, task = task)
-  if (length(learner$par.vals) > 0 && any(vlapply(learner$par.vals, is.expression)))
+  if (any(vlapply(learner$par.vals, is.expression)))
     learner$par.vals = lapply(learner$par.vals, function(expr) eval(expr, envir = dict))
   return(learner)
 }
 
 #' @rdname evaluateLearner
 #' @export
 evaluateParset = function(par.set, task) {
-  dict = makeTaskDictionary(task = task)
   if (hasExpression(par = par.set)) {
+    dict = makeTaskDictionary(task = task)
     checkParamSet(par.set = par.set, dict = dict)
     par.set = evaluateParamSet(par.set = par.set, dict = dict)
     ## assure that the value names are also shown if the values list was unnamed
     par.set$pars = lapply(par.set$pars, function(x) {
-      if (is.null(x$values) || !is.null(names(x$values)))
-        return(x)
-      names(x$values) = unlist(lapply(x$values, function(vals) vals))
+      if (!is.null(x$values) && is.null(names(x$values)))
+        names(x$values) = unlist(x$values)
       return(x)
     })
   }

diff --git a/R/makeLearner.R b/R/makeLearner.R
@@ -43,17 +43,7 @@
 #' @return [\code{\link{Learner}}].
 #' @family learner
 #' @export
-#' @details Note that learners can also contain task dependent expressions, which can be based on any
-#' information provided by the task. For convenience, the most often used keys are available directly
-#' \itemize{
-#'   \item{\code{task}:} the task itself, allowing to access any of its elements
-#'   \item{\code{p}:} the number of features in the task
-#'   \item{\code{n}:} the number of observations in the task
-#'   \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel"
-#'   \item{\code{k}:} the number of classes of the target variable (only available for classification tasks)
-#' }
-#' However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could
-#' access the "blocking" via \code{task$task.desc$has.blocking}.
+#' @note Learners can contain task dependent expressions, see \code{\link{evaluateLearner}} for more information.
 #' @aliases Learner
 #' @seealso [\code{\link{resample}}], [\code{\link{predict.WrappedModel}}]
 #' @examples

diff --git a/R/setHyperPars.R b/R/setHyperPars.R
@@ -11,25 +11,14 @@
 #' @note If a named (hyper)parameter can't be found for the given learner, the 3
 #' closest (hyper)parameter names will be output in case the user mistyped.
 #' @export
-#' @details Note that learners can also contain task dependent expressions, which can be based on any
-#' information provided by the task. For convenience, the most often used keys are available directly
-#' \itemize{
-#'   \item{\code{task}:} the task itself, allowing to access any of its elements
-#'   \item{\code{p}:} the number of features in the task
-#'   \item{\code{n}:} the number of observations in the task
-#'   \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel"
-#'   \item{\code{k}:} the number of classes of the target variable (only available for classification tasks)
-#' }
-#' However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could
-#' access the "blocking" via \code{task$task.desc$has.blocking}.
+#' @note Learners can contain task dependent expressions, see \code{\link{evaluateLearner}} for more information.
 #' @family learner
 #' @importFrom utils adist
 #' @examples
 #' cl1 = makeLearner("classif.ksvm", sigma = 1)
 #' cl2 = setHyperPars(cl1, sigma = 10, par.vals = list(C = 2))
 #' cl3 = setHyperPars(cl2, C = expression(round(n / p)))
 #' print(cl1)
-#' # note the now set and altered hyperparameters:
 #' print(cl2)
 #' print(cl3)
 setHyperPars = function(learner, ..., par.vals = list()) {

diff --git a/R/train.R b/R/train.R
@@ -30,7 +30,7 @@
 #' print(mod)
 train = function(learner, task, subset, weights = NULL) {
   learner = checkLearner(learner)
-  if (hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression)))
+  if (hasExpression(learner))
     learner = evaluateLearner(learner = learner, task = task)
   assertClass(task, classes = "Task")
   if (missing(subset)) {

diff --git a/R/tuneParams.R b/R/tuneParams.R
@@ -79,7 +79,7 @@
 tuneParams = function(learner, task, resampling, measures, par.set, control, show.info = getMlrOption("show.info")) {
   learner = checkLearner(learner)
   assertClass(task, classes = "Task")
-  if (hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression)))
+  if (hasExpression(learner))
     learner = evaluateLearner(learner = learner, task = task)
   measures = checkMeasures(measures, learner)
   assertClass(par.set, classes = "ParamSet")

diff --git a/man/evaluateLearner.Rd b/man/evaluateLearner.Rd
diff --git a/man/makeLearner.Rd b/man/makeLearner.Rd
diff --git a/man/setHyperPars.Rd b/man/setHyperPars.Rd
diff --git a/tests/testthat/test_base_evaluateLearner.R b/tests/testthat/test_base_evaluateLearner.R
@@ -24,7 +24,7 @@ test_that("expressions in learners", {
   expect_true(is.expression(x1))
   expect_true(!is.expression(x2))
   expect_equal(x2, binaryclass.task$task.desc$has.blocking)
-  
+
   ## (4) expressions within hyperparameters
   x1 = lrn1$par.vals$minsplit
   x2 = lrn2$par.vals$minsplit
@@ -48,9 +48,22 @@ test_that("expressions in parameter sets", {
     makeDiscreteParam("sigma", values = expression(list(p, k)))
   )
   ps2 = evaluateParset(par.set = ps1, task = binaryclass.task)
-  
+
   ## expressions within parameter sets
   expect_equal(ps2$pars$C$lower, 2L)
   expect_equal(ps2$pars$C$upper, 208L)
   expect_equal(ps2$pars$sigma$values, list("60" = 60, "2" = 2))
 })
+
+test_that("tuning works with expressions", {
+  task = multiclass.small.task
+  lrn = makeLearner("classif.rpart")
+  lrn = makeFilterWrapper(lrn, fw.method = "kruskal.test")
+  ps = makeParamSet(
+    makeIntegerParam("fw.abs", lower = 1, upper = expression(ceiling(n/2)))
+  )
+  ctrl = makeTuneControlRandom(maxit = 5)
+  res = tuneParams(lrn, task = task, resampling = hout, par.set = ps, control = ctrl)
+  res = as.data.frame(res$opt.path)
+  expect_integer(res$fw.abs, lower = 1, upper = ceiling(getTaskSize(task)/2), any.missing = FALSE)
+})