diff --git a/docs/index.rst b/docs/index.rst index 3160ac2..edb60d4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,6 +6,7 @@ space conditional + orion/index .. toctree:: diff --git a/docs/orion/builder.rst b/docs/orion/builder.rst new file mode 100644 index 0000000..79b8954 --- /dev/null +++ b/docs/orion/builder.rst @@ -0,0 +1,5 @@ +Space Builder +============= + +.. automodule:: sspace.orion.builder + :members: diff --git a/docs/orion/index.rst b/docs/orion/index.rst new file mode 100644 index 0000000..6e8d719 --- /dev/null +++ b/docs/orion/index.rst @@ -0,0 +1,10 @@ +Orion +===== + +.. toctree:: + :maxdepth: 1 + :caption: Modules + + builder + space + transformer diff --git a/docs/orion/space.rst b/docs/orion/space.rst new file mode 100644 index 0000000..f82786c --- /dev/null +++ b/docs/orion/space.rst @@ -0,0 +1,6 @@ +Space search +============ + +.. automodule:: sspace.orion.space + :members: + diff --git a/docs/orion/transformer.rst b/docs/orion/transformer.rst new file mode 100644 index 0000000..9352637 --- /dev/null +++ b/docs/orion/transformer.rst @@ -0,0 +1,5 @@ +Dimension transformations +========================= + +.. automodule:: sspace.orion.transformer + :members: diff --git a/requirements.txt b/requirements.txt index 6ac187d..033c4e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ ConfigSpace -orion +scipy +numpy diff --git a/sspace/backends/ospace.py b/sspace/backends/ospace.py index be813cf..ef29c1f 100644 --- a/sspace/backends/ospace.py +++ b/sspace/backends/ospace.py @@ -1,22 +1,13 @@ -import_error = None +import functools -try: - import functools +from sspace.orion.builder import SpaceBuilder, DimensionBuilder +import sspace.orion.space as OrionSpace - from orion.core.io.space_builder import SpaceBuilder, DimensionBuilder - import orion.algo.space as OrionSpace - - from sspace.utils import sort_dict - -except ImportError as e: - import_error = e +from sspace.utils import sort_dict class _OrionSpaceBuilder: def __init__(self): - if import_error: - raise import_error - self.builder = DimensionBuilder() self.dim_leaves = { 'uniform': self.uniform, diff --git a/sspace/convert.py b/sspace/convert.py index 3aba587..c4b3861 100644 --- a/sspace/convert.py +++ b/sspace/convert.py @@ -4,13 +4,12 @@ import ConfigSpace as cs import ConfigSpace.hyperparameters as csh - from orion.core.io.space_builder import SpaceBuilder, DimensionBuilder - import orion.algo.space as OrionSpace + from sspace.orion.builder import SpaceBuilder, DimensionBuilder + import sspace.orion.space as OrionSpace import functools from sspace.utils import sort_dict - except ImportError as e: import_error = e diff --git a/sspace/orion/__init__.py b/sspace/orion/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sspace/orion/builder.py b/sspace/orion/builder.py new file mode 100644 index 0000000..2dc5848 --- /dev/null +++ b/sspace/orion/builder.py @@ -0,0 +1,342 @@ +# -*- coding: utf-8 -*- +# pylint: disable=eval-used,protected-access +""" +Create Space objects from configuration +======================================= + +Functions which build ``Dimension`` and ``Space`` objects for defining problem's search space. + +Replace actual hyperparam values in your script's config files or cmd +arguments with orion's keywords for declaring hyperparameter types +to be optimized. + +Motivation for this way of orion's configuration is to achieve as +minimal intrusion to user's workflow as possible by: + + * Offering to user the choice to keep the original way of passing + hyperparameters to their script, be it through some **config file + type** (e.g. yaml, json, ini, etc) or through **command line + arguments**. + + * Instead of passing the actual hyperparameter values, use one of + the characteristic keywords, names enlisted in :scipy.stats:`distributions` + or :class:`orion.core.io.space_builder.DimensionBuilder`, + to describe distributions and declare the hyperparameters + to be optimized. So that a possible command line argument + like ``-lrate0=0.1`` becomes ``-lrate0~'uniform(-3, 1)'``. + +.. note:: + Use ``~`` instead of ``=`` to denote that a variable "draws from" + a distribution. We support limited Python syntax for describing distributions. + + * Module will also use the script's provided input file/args as a + template to fill an appropriate input with proposed values for the + script's execution in each hyperiteration. + +""" +import logging +import re +from collections import OrderedDict + +from scipy.stats import distributions as sp_dists + +from sspace.orion.space import Categorical, Fidelity, Integer, Real, Space +from sspace.orion.utils import flatten + +log = logging.getLogger(__name__) + + +def _check_expr_to_eval(expr): + if "__" in expr or ";" in expr: + raise RuntimeError("Cannot use builtins, '__' or ';'. Sorry.") + return + + +def _get_arguments(*args, **kwargs): + return args, kwargs + + +def _real_or_int(kwargs): + return Integer if kwargs.pop("discrete", False) else Real + + +def replace_key_in_order(odict, key_prev, key_after): + """Replace ``key_prev`` of ``OrderedDict`` ``odict`` with ``key_after``, + while leaving its value and the rest of the dictionary intact and in the + same order. + """ + tmp = OrderedDict() + for k, v in odict.items(): + if k == key_prev: + tmp[key_after] = v + else: + tmp[k] = v + return tmp + + +def _should_not_be_built(expression): + return expression.startswith("-") or expression.startswith(">") + + +def _remove_marker(expression, marker="+"): + return ( + expression.replace(marker, "", 1) + if expression.startswith(marker) + else expression + ) + + +class DimensionBuilder(object): + """Create `Dimension` objects using a name for it and an string expression + which encodes prior and dimension information. + + Basically, one must provide a string like a function call to a method that + has the name of a distribution, .e.g. ``alpha``, and then provide settings + about that distributions and information about the `Dimension`, if it + cannot be inferred. One example for the latter case is: + + ``uniform(-3, 5)`` will return a :class:`orion.algo.space.Real` dimension, while + ``uniform(-3, 5, discrete=True)`` will return an :class:`orion.algo.space.Integer` dimension. + + Sometimes there is also a separate name for the same distribution in integers, + for the 'uniform' example: + + ``randint(-3, 5)`` will return a uniform :class:`orion.algo.space.Integer` dimension. + + For categorical dimensions, one can use either ``enum`` or ``random`` name. + ``random`` however can be used for uniform reals or integers as well. + + Most names are taken from instances contained in :scipy.stats:`distributions`. + So, if the distribution you are searching for is there, then `DimensionBuilder` + can build one dimension with that prior! + + Examples + -------- + >>> dimbuilder = DimensionBuilder() + >>> dimbuilder.build('learning_rate', 'loguniform(0.001, 1, shape=10)') + Real(name=learning_rate, prior={reciprocal: (0.001, 1), {}}, shape=(10,), default value=None) + >>> dimbuilder.build('something_else', 'poisson(mu=3)') + Integer(name=something_else, prior={poisson: (), {'mu': 3}}, shape=(), default value=None) + >>> dim = dimbuilder.build('other2', 'uniform(-5, 2)') + >>> dim + Real(name=other2, prior={uniform: (-5, 7), {}}, shape=(), default value=None) + >>> dim.interval() + (-5.0, 2.0) + + """ + + def __init__(self): + """Init of `DimensionBuilder`.""" + self.name = None + + def choices(self, *args, **kwargs): + """Create a :class:`orion.algo.space.Categorical` dimension.""" + name = self.name + try: + if isinstance(args[0], (dict, list)): + return Categorical(name, *args, **kwargs) + except IndexError as exc: + raise TypeError( + "Parameter '{}': " "Expected argument with categories.".format(name) + ) from exc + + return Categorical(name, args, **kwargs) + + def fidelity(self, *args, **kwargs): + """Create a :class:`orion.algo.space.Fidelity` dimension.""" + name = self.name + return Fidelity(name, *args, **kwargs) + + def uniform(self, *args, **kwargs): + """Create an :class:`orion.algo.space.Integer` or :class:`orion.algo.space.Real` uniformly + distributed dimension. + + .. note:: Changes scipy convention for uniform's arguments. In scipy, + ``uniform(a, b)`` means uniform in the interval [a, a+b). Here, it + means uniform in the interval [a, b]. + + """ + name = self.name + klass = _real_or_int(kwargs) + if len(args) == 2: + return klass(name, "uniform", args[0], args[1] - args[0], **kwargs) + return klass(name, "uniform", *args, **kwargs) + + def randint(self, *args, **kwargs): + """Create an :class:`orion.algo.space.Integer` or :class:`orion.algo.space.Real` uniformly + distributed dimension. + + .. note:: Changes scipy convention for uniform's arguments. In scipy, + ``uniform(a, b)`` means uniform in the interval [a, a+b). Here, it + means uniform in the interval [a, b]. + + """ + raise NotImplementedError( + "`randint` is not supported. Use uniform(discrete=True) instead." + ) + + def gaussian(self, *args, **kwargs): + """Synonym for :scipy.stats:`distributions.norm`.""" + return self.normal(*args, **kwargs) + + def normal(self, *args, **kwargs): + """Another synonym for :scipy.stats:`distributions.norm`.""" + name = self.name + klass = _real_or_int(kwargs) + return klass(name, "norm", *args, **kwargs) + + def loguniform(self, *args, **kwargs): + """Return a `Dimension` object with + :scipy.stats:`distributions.reciprocal` prior distribution. + """ + name = self.name + klass = _real_or_int(kwargs) + return klass(name, "reciprocal", *args, **kwargs) + + def _build(self, name, expression): + """Build a `Dimension` object using a string as its `name` and another + string, `expression`, from configuration as a "function" to create it. + """ + self.name = name + _check_expr_to_eval(expression) + + prior, arg_string = re.findall(r"([a-z][a-z0-9_]*)\((.*)\)", expression)[0] + globals_ = {"__builtins__": {}} + try: + dimension = eval("self." + expression, globals_, {"self": self}) + + return dimension + except AttributeError: + pass + + # If not found in the methods of `DimensionBuilder`. + # try to see if it is legit scipy stuff and call a `Dimension` + # appropriately. + args, kwargs = eval( + "_get_arguments(" + arg_string + ")", + globals_, + {"_get_arguments": _get_arguments}, + ) + + if hasattr(sp_dists._continuous_distns, prior): + klass = _real_or_int(kwargs) + elif hasattr(sp_dists._discrete_distns, prior): + klass = Integer + else: + raise TypeError( + "Parameter '{0}': " + "'{1}' does not correspond to a supported distribution.".format( + name, prior + ) + ) + dimension = klass(name, prior, *args, **kwargs) + + return dimension + + def build(self, name, expression): + """Check ``DimensionBuilder._build`` for documentation. + + .. note:: Warm-up: Fail early if arguments make object not usable. + + """ + try: + dimension = self._build(name, expression) + except ValueError as exc: + raise TypeError( + "Parameter '{}': Incorrect arguments.".format(name) + ) from exc + except IndexError as exc: + error_msg = ( + "Parameter '{0}': Please provide a valid form for prior:\n" + "'distribution(*args, **kwargs)'\n" + "Provided: '{1}'".format(name, expression) + ) + raise TypeError(error_msg) from exc + + try: + dimension.sample() + except TypeError as exc: + error_msg = ( + "Parameter '{0}': Incorrect arguments for distribution '{1}'.\n" + "Scipy Docs::\n\n{2}".format( + name, dimension._prior_name, dimension.prior.__doc__ + ) + ) + raise TypeError(error_msg) from exc + except ValueError as exc: + raise TypeError( + "Parameter '{0}': Incorrect arguments.".format(name) + ) from exc + + return dimension + + +class SpaceBuilder(object): + """Build a :class:`orion.algo.space.Space` object form user's configuration.""" + + def __init__(self): + self.dimbuilder = DimensionBuilder() + self.space = None + + self.commands_tmpl = None + + self.converter = None + self.parser = None + + def build(self, configuration): + """Create a definition of the problem's search space. + + Using information from the user's script configuration (if provided) and the + command line arguments, will create a :class:`orion.algo.space.Space` object defining the + problem's search space. + + Parameters + ---------- + configuration: OrderedDict + An OrderedDict containing the name and the expression of the parameters. + + Returns + ------- + :class:`orion.algo.space.Space` + The problem's search space definition. + + """ + self.space = Space() + for namespace, expression in flatten(configuration).items(): + if _should_not_be_built(expression): + continue + + expression = _remove_marker(expression) + dimension = self.dimbuilder.build(namespace, expression) + + try: + self.space.register(dimension) + except ValueError as exc: + error_msg = "Conflict for name '{}' in parameters".format(namespace) + raise ValueError(error_msg) from exc + + return self.space + + def build_to(self, config_path, trial, experiment=None): + """Create the configuration for the user's script. + + Using the configuration parser, create the commandline associated with the + user's script while replacing the correct instances of parameter distributions by + their actual values. If needed, the parser will also create a configuration file. + + Parameters + ---------- + config_path: str + Path in which the configuration file instance will be created. + trial: `orion.core.worker.trial.Trial` + Object with concrete parameter values for the defined :class:`orion.algo.space.Space`. + experiment: :class:`orion.core.worker.experiment.Experiment`, optional + Object with information related to the current experiment. + + Returns + ------- + list + The commandline arguments that must be given to script for execution. + + """ + return self.parser.format(config_path, trial, experiment) diff --git a/sspace/orion/legacy.py b/sspace/orion/legacy.py new file mode 100644 index 0000000..192d34a --- /dev/null +++ b/sspace/orion/legacy.py @@ -0,0 +1,67 @@ +from sspace.orion.utils import unflatten, flatten + + +class Param: + def __init__(self, name, value, type) -> None: + self.name = name + self.value = value + self.type = type + + +class Trial: + def __init__(self, params) -> None: + self._params = [Param(**param) for param in params] + + @property + def params(self): + return unflatten({param.name: param.value for param in self._params}) + + +class format_trials: + + @staticmethod + def tuple_to_trial(data, space, status="new"): + """Create a `orion.core.worker.trial.Trial` object from `data`. + + Parameters + ---------- + data: tuple + A tuple representing a sample point from `space`. + space: `orion.algo.space.Space` + Definition of problem's domain. + + Returns + ------- + A trial object `orion.core.worker.trial.Trial`. + """ + if len(data) != len(space): + raise ValueError( + f"Data point is not compatible with search space:\ndata: {data}\nspace: {space}" + ) + + params = [] + for i, dim in enumerate(space.values()): + params.append(dict(name=dim.name, type=dim.type, value=data[i])) + + return Trial(params=params) + + @staticmethod + def trial_to_tuple(trial, space): + """Extract a parameter tuple from a `orion.core.worker.trial.Trial`. + + The order within the tuple is dictated by the defined + `orion.algo.space.Space` object. + """ + params = flatten(trial.params) + trial_keys = set(params.keys()) + space_keys = set(space.keys()) + if trial_keys != space_keys: + raise ValueError( + """" +The trial {} has wrong params: +Trial params: {} +Space dims: {}""".format( + trial.id, sorted(trial_keys), sorted(space_keys) + ) + ) + return tuple(params[name] for name in space.keys()) diff --git a/sspace/orion/space.py b/sspace/orion/space.py new file mode 100644 index 0000000..e06370b --- /dev/null +++ b/sspace/orion/space.py @@ -0,0 +1,1099 @@ +# -*- coding: utf-8 -*- +# pylint:disable=too-many-lines +""" +Search space of optimization problems +===================================== + +Classes for representing the search space of an optimization problem. + +There are 3 classes representing possible parameter types. All of them subclass +the base class `Dimension`: + + * `Real` + * `Integer` + * `Categorical` + +These are instantiated to declare a problem's parameter space. Oríon registers +them in a ordered dictionary, `Space`, which describes how the parameters should +be in order for `orion.algo.base.AbstractAlgorithm` implementations to +communicate with `orion.core`. + +Parameter values recorded in `orion.core.worker.trial.Trial` objects must be +and are in concordance with `orion.algo.space` objects. These objects will be +defined by `orion.core` using the user script's configuration file. + +Prior distributions, contained in `Dimension` classes, are based on +:scipy.stats:`distributions` and should be configured as noted in the +scipy documentation for each specific implentation of a random variable type, +unless noted otherwise! + +""" +import copy +import logging +import numbers + +import numpy +from scipy.stats import distributions + +from sspace.orion.utils import flatten, float_to_digits_list +from sspace.orion.legacy import format_trials + +logger = logging.getLogger(__name__) + + +def check_random_state(seed): + """Return numpy global rng or RandomState if seed is specified""" + if seed is None or seed is numpy.random: + rng = ( + numpy.random.mtrand._rand + ) # pylint:disable=protected-access,c-extension-no-member + elif isinstance(seed, numpy.random.RandomState): + rng = seed + else: + try: + rng = numpy.random.RandomState(seed) + except Exception as e: + raise ValueError( + "%r cannot be used to seed a numpy.random.RandomState" + " instance" % seed + ) from e + + return rng + + +# helper class to be able to print [1, ..., 4] instead of [1, '...', 4] +class _Ellipsis: # pylint:disable=too-few-public-methods + def __repr__(self): + return "..." + + +class Dimension: + """Base class for search space dimensions. + + Attributes + ---------- + name : str + Unique identifier for this `Dimension`. + type : str + Identifier for the type of parameters this `Dimension` is representing. + it can be 'real', 'integer', or 'categorical' (name of a subclass). + prior : `scipy.stats.distributions.rv_generic` + A distribution over the original dimension. + shape : tuple + Defines how many dimensions are packed in this `Dimension`. + Describes the shape of the corresponding tensor. + + """ + + NO_DEFAULT_VALUE = None + + def __init__(self, name, prior, *args, **kwargs): + """Init code which is common for `Dimension` subclasses. + + Parameters + ---------- + name : str + Unique identifier associated with this `Dimension`, + e.g. 'learning_rate'. + prior : str | `scipy.stats.distributions.rv_generic` + Corresponds to a name of an instance or an instance itself of + `scipy.stats.distributions.rv_generic`. Basically, + the name of the distribution one wants to use as a :attr:`prior`. + args : list + kwargs : dict + Shape parameter(s) for the `prior` distribution. + Should include all the non-optional arguments. + It may include ``loc``, ``scale``, ``shape``. + + .. seealso:: `scipy.stats.distributions` for possible values of + `prior` and their arguments. + + """ + self._name = None + self.name = name + + if isinstance(prior, str): + self._prior_name = prior + self.prior = getattr(distributions, prior) + elif prior is None: + self._prior_name = "None" + self.prior = prior + else: + self._prior_name = prior.name + self.prior = prior + self._args = args + self._kwargs = kwargs + self._default_value = kwargs.pop("default_value", self.NO_DEFAULT_VALUE) + self._shape = kwargs.pop("shape", None) + self.validate() + + def validate(self): + """Validate dimension arguments""" + if "random_state" in self._kwargs or "seed" in self._kwargs: + raise ValueError( + "random_state/seed cannot be set in a " + "parameter's definition! Set seed globally!" + ) + if "discrete" in self._kwargs: + raise ValueError( + "Do not use kwarg 'discrete' on `Dimension`, " + "use pure `_Discrete` class instead!" + ) + if "size" in self._kwargs: + raise ValueError("Use 'shape' keyword only instead of 'size'.") + + if ( + self.default_value is not self.NO_DEFAULT_VALUE + and self.default_value not in self + ): + raise ValueError( + "{} is not a valid value for this Dimension. " + "Can't set default value.".format(self.default_value) + ) + + def _get_hashable_members(self): + return ( + self.name, + self.shape, + self.type, + tuple(self._args), + tuple(self._kwargs.items()), + self.default_value, + self._prior_name, + ) + + # pylint:disable=protected-access + def __eq__(self, other): + """Return True if other is the same dimension as self""" + if not isinstance(other, Dimension): + return False + + return self._get_hashable_members() == other._get_hashable_members() + + def __hash__(self): + """Return the hash of the hashable members""" + return hash(self._get_hashable_members()) + + def sample(self, n_samples=1, seed=None): + """Draw random samples from `prior`. + + Parameters + ---------- + n_samples : int, optional + The number of samples to be drawn. Default is 1 sample. + seed : None | int | ``numpy.random.RandomState`` instance, optional + This parameter defines the RandomState object to use for drawing + random variates. If None (or np.random), the **global** + np.random state is used. If integer, it is used to seed a + RandomState instance **just for the call of this function**. + Default is None. + + Set random state to something other than None for reproducible + results. + + .. warning:: Setting `seed` with an integer will cause the same ndarray + to be sampled if ``n_samples > 0``. Set `seed` with a + ``numpy.random.RandomState`` to carry on the changes in random state + across many samples. + + """ + samples = [ + self.prior.rvs( + *self._args, size=self.shape, random_state=seed, **self._kwargs + ) + for _ in range(n_samples) + ] + return samples + + def cast(self, point): + """Cast a point to dimension's type + + If casted point will stay a list or a numpy array depending on the + given point's type. + """ + raise NotImplementedError + + def interval(self, alpha=1.0): + """Return a tuple containing lower and upper bound for parameters. + + If parameters are drawn from an 'open' supported random variable, + then it will be attempted to calculate the interval from which + a variable is `alpha`-likely to be drawn from. + + """ + return self.prior.interval(alpha, *self._args, **self._kwargs) + + def __contains__(self, point): + """Check if constraints hold for this `point` of `Dimension`. + + :param point: a parameter corresponding to this `Dimension`. + :type point: numeric or array-like + + .. note:: Default `Dimension` does not have any extra constraints. + It just checks whether point lies inside the support and the shape. + + """ + raise NotImplementedError + + def __repr__(self): + """Represent the object as a string.""" + return "{0}(name={1}, prior={{{2}: {3}, {4}}}, shape={5}, default value={6})".format( + self.__class__.__name__, + self.name, + self._prior_name, + self._args, + self._kwargs, + self.shape, + self._default_value, + ) + + def get_prior_string(self): + """Build the string corresponding to current prior""" + args = copy.deepcopy(list(self._args[:])) + if self._prior_name == "uniform" and len(args) == 2: + args[1] = args[0] + args[1] + args[0] = args[0] + + args = list(map(str, args)) + + for k, v in self._kwargs.items(): + if isinstance(v, str): + args += ["{}='{}'".format(k, v)] + else: + args += ["{}={}".format(k, v)] + + if self._shape is not None: + args += ["shape={}".format(self._shape)] + + if self.default_value is not self.NO_DEFAULT_VALUE: + args += ["default_value={}".format(repr(self.default_value))] + + prior_name = self._prior_name + if prior_name == "reciprocal": + prior_name = "loguniform" + + if prior_name == "norm": + prior_name = "normal" + + return "{prior_name}({args})".format( + prior_name=prior_name, args=", ".join(args) + ) + + def get_string(self): + """Build the string corresponding to current dimension""" + return "{name}~{prior}".format(name=self.name, prior=self.get_prior_string()) + + @property + def name(self): + """See `Dimension` attributes.""" + return self._name + + @name.setter + def name(self, value): + if isinstance(value, str) or value is None: + self._name = value + else: + raise TypeError( + "Dimension's name must be either string or None. " + "Provided: {}, of type: {}".format(value, type(value)) + ) + + @property + def default_value(self): + """Return the default value for this dimensions""" + return self._default_value + + @property + def type(self): + """See `Dimension` attributes.""" + return self.__class__.__name__.lower() + + @property + def prior_name(self): + """Return the name of the prior""" + return self._prior_name + + @property + def shape(self): + """Return the shape of dimension.""" + # Default shape `None` corresponds to 0-dim (scalar) or shape == (). + # Read about ``size`` argument in + # `scipy.stats._distn_infrastructure.rv_generic._argcheck_rvs` + if self.prior is None: + return None + + _, _, _, size = self.prior._parse_args_rvs( + *self._args, # pylint:disable=protected-access + size=self._shape, + **self._kwargs, + ) + return size + + # pylint:disable=no-self-use + @property + def cardinality(self): + """Return the number of all the possible points from `Dimension`. + The default value is ``numpy.inf``. + """ + return numpy.inf + + +def _is_numeric_array(point): + """Test whether a point is numerical object or an array containing only numerical objects""" + + def _is_numeric(item): + return isinstance(item, (numbers.Number, numpy.ndarray)) + + try: + return numpy.all(numpy.vectorize(_is_numeric)(point)) + except TypeError: + return _is_numeric(point) + + return False + + +class Real(Dimension): + """Search space dimension that can take on any real value. + + Parameters + ---------- + name : str + prior : str + See Parameters of `Dimension.__init__()`. + args : list + kwargs : dict + See Parameters of `Dimension.__init__()` for general. + + Real kwargs (extra) + ------------------- + low : float + Lower bound (inclusive), optional; default ``-numpy.inf``. + high : float: + Upper bound (inclusive), optional; default ``numpy.inf``. + The upper bound must be inclusive because of rounding errors + during optimization which may cause values to round exactly + to the upper bound. + precision : int + Precision, optional; default ``4``. + shape : tuple + Defines how many dimensions are packed in this `Dimension`. + Describes the shape of the corresponding tensor. + + """ + + def __init__(self, name, prior, *args, **kwargs): + self._low = kwargs.pop("low", -numpy.inf) + self._high = kwargs.pop("high", numpy.inf) + if self._high <= self._low: + raise ValueError( + "Lower bound {} has to be less than upper bound {}".format( + self._low, self._high + ) + ) + + precision = kwargs.pop("precision", 4) + if (isinstance(precision, int) and precision > 0) or precision is None: + self.precision = precision + else: + raise TypeError( + "Precision should be a non-negative int or None, " + "instead was {} of type {}.".format(precision, type(precision)) + ) + + super(Real, self).__init__(name, prior, *args, **kwargs) + + def __contains__(self, point): + """Check if constraints hold for this `point` of `Dimension`. + + :param point: a parameter corresponding to this `Dimension`. + :type point: numeric or array-like + + .. note:: Default `Dimension` does not have any extra constraints. + It just checks whether point lies inside the support and the shape. + + """ + if not _is_numeric_array(point): + return False + + low, high = self.interval() + + point_ = numpy.asarray(point) + + if point_.shape != self.shape: + return False + + return numpy.all(point_ >= low) and numpy.all(point_ <= high) + + def get_prior_string(self): + """Build the string corresponding to current prior""" + prior_string = super(Real, self).get_prior_string() + + if self.precision != 4: + return prior_string[:-1] + f", precision={self.precision})" + + return prior_string + + def interval(self, alpha=1.0): + """Return a tuple containing lower and upper bound for parameters. + + If parameters are drawn from an 'open' supported random variable, + then it will be attempted to calculate the interval from which + a variable is `alpha`-likely to be drawn from. + + .. note:: Both lower and upper bounds are inclusive. + + """ + prior_low, prior_high = super(Real, self).interval(alpha) + return (max(prior_low, self._low), min(prior_high, self._high)) + + def sample(self, n_samples=1, seed=None): + """Draw random samples from `prior`. + + .. seealso:: `Dimension.sample` + + """ + samples = [] + for _ in range(n_samples): + for _ in range(4): + sample = super(Real, self).sample(1, seed) + if sample[0] not in self: + nice = False + continue + nice = True + samples.extend(sample) + break + if not nice: + raise ValueError( + "Improbable bounds: (low={0}, high={1}). " + "Please make interval larger.".format(self._low, self._high) + ) + + return samples + + # pylint:disable=no-self-use + def cast(self, point): + """Cast a point to float + + If casted point will stay a list or a numpy array depending on the + given point's type. + """ + casted_point = numpy.asarray(point).astype(float) + + if not isinstance(point, numpy.ndarray): + return casted_point.tolist() + + return casted_point + + @staticmethod + def get_cardinality(shape, interval, precision, prior_name): + """Return the number of all the possible points based and shape and interval""" + if precision is None or prior_name not in ["loguniform", "reciprocal"]: + return numpy.inf + + # If loguniform, compute every possible combinations based on precision + # for each orders of magnitude. + + def format_number(number): + """Turn number into an array of digits, the size of the precision""" + + formated_number = numpy.zeros(precision) + digits_list = float_to_digits_list(number) + lenght = min(len(digits_list), precision) + formated_number[:lenght] = digits_list[:lenght] + + return formated_number + + min_number = format_number(interval[0]) + max_number = format_number(interval[1]) + + # Compute the number of orders of magnitude spanned by lower and upper bounds + # (if lower and upper bounds on same order of magnitude, span is equal to 1) + lower_order = numpy.floor(numpy.log10(numpy.abs(interval[0]))) + upper_order = numpy.floor(numpy.log10(numpy.abs(interval[1]))) + order_span = upper_order - lower_order + 1 + + # Total number of possibilities for an order of magnitude + full_cardinality = 9 * 10 ** (precision - 1) + + def num_below(number): + + return ( + numpy.clip(number, a_min=0, a_max=9) + * 10 ** numpy.arange(precision - 1, -1, -1) + ).sum() + + # Number of values out of lower bound on lowest order of magnitude + cardinality_below = num_below(min_number) + # Number of values out of upper bound on highest order of magnitude. + # Remove 1 to be inclusive. + cardinality_above = full_cardinality - num_below(max_number) - 1 + + # Full cardinality on all orders of magnitude, minus those out of bounds. + cardinality = ( + full_cardinality * order_span - cardinality_below - cardinality_above + ) + return int(cardinality) ** int(numpy.prod(shape) if shape else 1) + + @property + def cardinality(self): + """Return the number of all the possible points from Integer `Dimension`""" + return Real.get_cardinality( + self.shape, self.interval(), self.precision, self._prior_name + ) + + +class _Discrete(Dimension): + def sample(self, n_samples=1, seed=None): + """Draw random samples from `prior`. + + Discretizes with `numpy.floor` the results from `Dimension.sample`. + + .. seealso:: `Dimension.sample` + .. seealso:: Discussion in https://github.com/epistimio/orion/issues/56 + if you want to understand better how this `Integer` diamond inheritance + works. + + """ + samples = super(_Discrete, self).sample(n_samples, seed) + # Making discrete by ourselves because scipy does not use **floor** + return list(map(self.cast, samples)) + + def interval(self, alpha=1.0): + """Return a tuple containing lower and upper bound for parameters. + + If parameters are drawn from an 'open' supported random variable, + then it will be attempted to calculate the interval from which + a variable is `alpha`-likely to be drawn from. + + Bounds are integers. + + .. note:: Both lower and upper bounds are inclusive. + + """ + low, high = super(_Discrete, self).interval(alpha) + try: + int_low = int(numpy.floor(low)) + except OverflowError: # infinity cannot be converted to Python int type + int_low = -numpy.inf + try: + int_high = int(numpy.ceil(high)) + except OverflowError: # infinity cannot be converted to Python int type + int_high = numpy.inf + return (int_low, int_high) + + def __contains__(self, point): + raise NotImplementedError + + +class Integer(Real, _Discrete): + """Search space dimension representing integer values. + + Parameters + ---------- + name : str + prior : str + See Parameters of `Dimension.__init__()`. + args : list + kwargs : dict + See Parameters of `Dimension.__init__()` for general. + + Real kwargs (extra) + ------------------- + low : float + Lower bound (inclusive), optional; default ``-numpy.inf``. + high : float: + Upper bound (inclusive), optional; default ``numpy.inf``. + precision : int + Precision, optional; default ``4``. + shape : tuple + Defines how many dimensions are packed in this `Dimension`. + Describes the shape of the corresponding tensor. + + """ + + def __contains__(self, point): + """Check if constraints hold for this `point` of `Dimension`. + + :param point: a parameter corresponding to this `Dimension`. + :type point: numeric or array-like + + `Integer` will check whether `point` contains only integers. + + """ + if not _is_numeric_array(point): + return False + + point_ = numpy.asarray(point) + if not numpy.all(numpy.equal(numpy.mod(point_, 1), 0)): + return False + + return super(Integer, self).__contains__(point) + + def cast(self, point): + """Cast a point to int + + If casted point will stay a list or a numpy array depending on the + given point's type. + """ + casted_point = numpy.asarray(point).astype(float) + + # Rescale point to make high bound inclusive. + low, high = self.interval() + if not numpy.any(numpy.isinf([low, high])): + high = high - low + casted_point -= low + casted_point = casted_point / high + casted_point = casted_point * (high + (1 - 1e-10)) + casted_point += low + casted_point = numpy.floor(casted_point).astype(int) + else: + casted_point = numpy.floor(casted_point).astype(int) + + if not isinstance(point, numpy.ndarray): + return casted_point.tolist() + + return casted_point + + def get_prior_string(self): + """Build the string corresponding to current prior""" + prior_string = super(Integer, self).get_prior_string() + return prior_string[:-1] + ", discrete=True)" + + @property + def prior_name(self): + """Return the name of the prior""" + return "int_{}".format(super(Integer, self).prior_name) + + @staticmethod + def get_cardinality(shape, interval): + """Return the number of all the possible points based and shape and interval""" + return int(interval[1] - interval[0] + 1) ** _get_shape_cardinality(shape) + + @property + def cardinality(self): + """Return the number of all the possible points from Integer `Dimension`""" + return Integer.get_cardinality(self.shape, self.interval()) + + +def _get_shape_cardinality(shape): + """Get the cardinality in a shape which can be int or tuple""" + shape_cardinality = 1 + if shape is None: + return shape_cardinality + + if isinstance(shape, int): + shape = (shape,) + + for cardinality in shape: + shape_cardinality *= cardinality + return shape_cardinality + + +class Categorical(Dimension): + """Search space dimension that can take on categorical values. + + Parameters + ---------- + name : str + See Parameters of `Dimension.__init__()`. + categories : dict or other iterable + A dictionary would associate categories to probabilities, else + it assumes to be drawn uniformly from the iterable. + kwargs : dict + See Parameters of `Dimension.__init__()` for general. + + """ + + def __init__(self, name, categories, **kwargs): + if isinstance(categories, dict): + self.categories = tuple(categories.keys()) + self._probs = tuple(categories.values()) + else: + self.categories = tuple(categories) + self._probs = tuple(numpy.tile(1.0 / len(categories), len(categories))) + + # Just for compatibility; everything should be `Dimension` to let the + # `Transformer` decorators be able to wrap smoothly anything. + prior = distributions.rv_discrete( + values=(list(range(len(self.categories))), self._probs) + ) + super(Categorical, self).__init__(name, prior, **kwargs) + + @staticmethod + def get_cardinality(shape, categories): + """Return the number of all the possible points based and shape and categories""" + return len(categories) ** _get_shape_cardinality(shape) + + @property + def cardinality(self): + """Return the number of all the possible values from Categorical `Dimension`""" + return Categorical.get_cardinality(self.shape, self.interval()) + + def sample(self, n_samples=1, seed=None): + """Draw random samples from `prior`. + + .. seealso:: `Dimension.sample` + + """ + rng = check_random_state(seed) + cat_ndarray = numpy.array(self.categories, dtype=object) + samples = [ + rng.choice(cat_ndarray, p=self._probs, size=self._shape) + for _ in range(n_samples) + ] + return samples + + def interval(self, alpha=1.0): + """Return a tuple of possible values that this categorical dimension can take.""" + return self.categories + + def __contains__(self, point): + """Check if constraints hold for this `point` of `Dimension`. + + :param point: a parameter corresponding to this `Dimension`. + :type point: numeric or array-like + + """ + point_ = numpy.asarray(point, dtype=object) + if point_.shape != self.shape: + return False + _check = numpy.vectorize(lambda x: x in self.categories) + return numpy.all(_check(point_)) + + def __repr__(self): + """Represent the object as a string.""" + if len(self.categories) > 5: + cats = self.categories[:2] + self.categories[-2:] + probs = self._probs[:2] + self._probs[-2:] + prior = list(zip(cats, probs)) + prior.insert(2, _Ellipsis()) + else: + cats = self.categories + probs = self._probs + prior = list(zip(cats, probs)) + + prior = map( + lambda x: "{0[0]}: {0[1]:.2f}".format(x) + if not isinstance(x, _Ellipsis) + else str(x), + prior, + ) + + prior = "{" + ", ".join(prior) + "}" + + return "Categorical(name={0}, prior={1}, shape={2}, default value={3})".format( + self.name, prior, self.shape, self.default_value + ) + + def get_prior_string(self): + """Build the string corresponding to current prior""" + args = list(map(str, self._args[:])) + args += ["{}={}".format(k, v) for k, v in self._kwargs.items()] + if self.default_value is not self.NO_DEFAULT_VALUE: + args += ["default_value={}".format(self.default_value)] + + cats = [repr(c) for c in self.categories] + if all(p == self._probs[0] for p in self._probs): + prior = "[{}]".format(", ".join(cats)) + else: + probs = list(zip(cats, self._probs)) + prior = "{" + ", ".join("{0}: {1:.2f}".format(c, p) for c, p in probs) + "}" + + args = [prior] + + if self._shape is not None: + args += ["shape={}".format(self._shape)] + if self.default_value is not self.NO_DEFAULT_VALUE: + args += ["default_value={}".format(repr(self.default_value))] + + return "choices({args})".format(args=", ".join(args)) + + @property + def get_prior(self): + """Return the priors""" + return self._probs + + @property + def prior_name(self): + """Return the name of the prior""" + return "choices" + + def cast(self, point): + """Cast a point to some category + + Casted point will stay a list or a numpy array depending on the + given point's type. + + Raises + ------ + ValueError + If one of the category in `point` is not present in current Categorical Dimension. + + """ + categorical_strings = {str(c): c for c in self.categories} + + def get_category(value): + """Return category corresponding to a string else return singleton object""" + if str(value) not in categorical_strings: + raise ValueError("Invalid category: {}".format(value)) + + return categorical_strings[str(value)] + + point_ = numpy.asarray(point, dtype=object) + cast = numpy.vectorize(get_category, otypes=[object]) + casted_point = cast(point_) + + if not isinstance(point, numpy.ndarray): + return casted_point.tolist() + + return casted_point + + +class Fidelity(Dimension): + """Fidelity `Dimension` for representing multi-fidelity. + + Fidelity dimensions are not optimized by the algorithms. If it supports multi-fidelity, the + algorithm will select a fidelity level for which it will sample hyper-parameter values to + explore a low fidelity space. This class is used as a place-holder so that algorithms can + discern fidelity dimensions from hyper-parameter dimensions. + + Parameters + ---------- + name : str + Name of the dimension + low: int + Mininum of the fidelity interval. + high: int + Maximum of the fidelity interval. + base: int + Base logarithm of the fidelity dimension. + + Attributes + ---------- + name : str + Name of the dimension + default_value: int + Maximum of the fidelity interval. + + """ + + # pylint:disable=super-init-not-called + def __init__(self, name, low, high, base=2): + if low <= 0: + raise AttributeError("Minimum resources must be a positive number.") + elif low > high: + raise AttributeError( + "Minimum resources must be smaller than maximum resources." + ) + if base < 1: + raise AttributeError("Base should be greater than or equal to 1") + self.name = name + self.low = int(low) + self.high = int(high) + self.base = int(base) + self.prior = None + self._prior_name = "None" + + @property + def default_value(self): + """Return `high`""" + return self.high + + @staticmethod + def get_cardinality(shape, interval): + """Return cardinality of Fidelity dimension, leave it to 1 as Fidelity dimension + does not contribute to cardinality in a fixed way now. + """ + return 1 + + @property + def cardinality(self): + """Return cardinality of Fidelity dimension, leave it to 1 as Fidelity dimension + does not contribute to cardinality in a fixed way now. + """ + return Fidelity.get_cardinality(self.shape, self.interval()) + + def get_prior_string(self): + """Build the string corresponding to current prior""" + args = [str(self.low), str(self.high)] + + if self.base != 2: + args += [f"base={self.base}"] + + return "fidelity({})".format(", ".join(args)) + + def validate(self): + """Do not do anything.""" + raise NotImplementedError + + def sample(self, n_samples=1, seed=None): + """Do not do anything.""" + return [self.high for i in range(n_samples)] + + def interval(self, alpha=1.0): + """Do not do anything.""" + return (self.low, self.high) + + def cast(self, point=0): + """Do not do anything.""" + raise NotImplementedError + + def __repr__(self): + """Represent the object as a string.""" + return "{0}(name={1}, low={2}, high={3}, base={4})".format( + self.__class__.__name__, self.name, self.low, self.high, self.base + ) + + def __contains__(self, value): + """Check if constraints hold for this `point` of `Dimension`. + + :param point: a parameter corresponding to this `Dimension`. + :type point: numeric or array-like + """ + return self.low <= value <= self.high + + +class Space(dict): + """Represents the search space. + + It is a sorted dictionary which contains `Dimension` objects. + The dimensions are sorted based on their names. + """ + + contains = Dimension + + def register(self, dimension): + """Register a new dimension to `Space`.""" + self[dimension.name] = dimension + + def sample(self, n_samples=1, seed=None): + """Draw random samples from this space. + + Parameters + ---------- + n_samples : int, optional + The number of samples to be drawn. Default is 1 sample. + seed : None | int | ``numpy.random.RandomState`` instance, optional + This parameter defines the RandomState object to use for drawing + random variates. If None (or np.random), the **global** + np.random state is used. If integer, it is used to seed a + RandomState instance **just for the call of this function**. + Default is None. + + Set random state to something other than None for reproducible + results. + + Returns + ------- + trials: list of `orion.core.worker.trial.Trial` + Each element is a separate sample of this space, a trial containing + values associated with the corresponding dimension. + + """ + rng = check_random_state(seed) + samples = [dim.sample(n_samples, rng) for dim in self.values()] + return [format_trials.tuple_to_trial(point, self) for point in zip(*samples)] + + def interval(self, alpha=1.0): + """Return a list with the intervals for each contained dimension.""" + res = list() + for dim in self.values(): + if dim.type == "categorical": + res.append(dim.categories) + else: + res.append(dim.interval(alpha)) + return res + + def __getitem__(self, key): + """Wrap __getitem__ to allow searching with position.""" + if isinstance(key, str): + return super(Space, self).__getitem__(key) + + values = list(self.values()) + return values[key] + + def __setitem__(self, key, value): + """Wrap __setitem__ to allow only ``Space.contains`` class, e.g. `Dimension`, + values and string keys. + """ + if not isinstance(key, str): + raise TypeError( + "Keys registered to {} must be string types. " + "Provided: {}".format(self.__class__.__name__, key) + ) + if not isinstance(value, self.contains): + raise TypeError( + "Values registered to {} must be {} types. " + "Provided: {}".format( + self.__class__.__name__, self.contains.__name__, value + ) + ) + if key in self: + raise ValueError( + "There is already a Dimension registered with this name. " + "Register it with another name. Provided: {}".format(key) + ) + super(Space, self).__setitem__(key, value) + + def __contains__(self, key_or_trial): + """Check whether `trial` is within the bounds of the space. + Or check if a name for a dimension is registered in this space. + + Parameters + ---------- + key_or_trial: str or `orion.core.worker.trial.Trial` + If str, test if the string is a dimension part of the search space. + If a Trial, test if trial's hyperparameters fit the current search space. + """ + if isinstance(key_or_trial, str): + return super(Space, self).__contains__(key_or_trial) + + if hasattr(key_or_trial, 'params'): + trial = key_or_trial + flattened_params = flatten(trial.params) + + else: + flattened_params = flatten(key_or_trial) + + keys = set(flattened_params.keys()) + for dim_name, dim in self.items(): + if dim_name not in keys or flattened_params[dim_name] not in dim: + return False + + keys.remove(dim_name) + + return len(keys) == 0 + + def __repr__(self): + """Represent as a string the space and the dimensions it contains.""" + dims = list(self.values()) + return "Space([{}])".format(",\n ".join(map(str, dims))) + + def items(self): + """Return items sorted according to keys""" + return [(k, self[k]) for k in self.keys()] + + def values(self): + """Return values sorted according to keys""" + return [self[k] for k in self.keys()] + + def keys(self): + """Return sorted keys""" + return list(iter(self)) + + def __iter__(self): + """Return sorted keys""" + return iter(sorted(super(Space, self).keys())) + + @property + def configuration(self): + """Return a dictionary of priors.""" + return {name: dim.get_prior_string() for name, dim in self.items()} + + @property + def cardinality(self): + """Return the number of all all possible sets of samples in the space""" + capacities = 1 + for dim in self.values(): + capacities *= dim.cardinality + return capacities diff --git a/sspace/orion/transformer.py b/sspace/orion/transformer.py new file mode 100644 index 0000000..16a795c --- /dev/null +++ b/sspace/orion/transformer.py @@ -0,0 +1,901 @@ +# -*- coding: utf-8 -*- +# pylint: disable=too-many-lines +""" +Perform transformations on Dimensions +===================================== + +Provide functions and classes to build a Space which an algorithm can operate on. + +""" +import copy +import functools +import itertools +from abc import ABCMeta, abstractmethod + +import numpy + +from sspace.orion.space import Categorical, Dimension, Fidelity, Integer, Real, Space +from sspace.orion.utils import flatten +from sspace.orion.legacy import format_trials + +NON_LINEAR = ["loguniform", "reciprocal"] + + +# pylint: disable=unused-argument +@functools.singledispatch +def build_transform(dim, type_requirement, dist_requirement): + """Base transformation factory + + Parameters + ---------- + dim: `orion.algo.space.Dimension` + A dimension object which may need transformations to match provided requirements. + type_requirement: str, None + String defining the requirement of the algorithm. It can be one of the following + - 'real', the dim should be transformed so type is `orion.algo.space.Real` + - 'integer', the dim should be transformed so type is `orion.algo.space.Integer` + - 'numerical', the dim should be transformed so type is either `orion.algo.space.Integer` or + `orion.algo.space.Real` + - None, no requirement + dist_requirement: str, None + String defining the distribution requirement of the algorithm. + - 'linear', any dimension with logarithmic prior while be linearized + - None, no requirement + + """ + return [] + + +@build_transform.register(Categorical) +def _(dim, type_requirement, dist_requirement): + transformers = [] + if type_requirement == "real": + transformers.extend( + [Enumerate(dim.categories), OneHotEncode(len(dim.categories))] + ) + elif type_requirement in ["integer", "numerical"]: + transformers.append(Enumerate(dim.categories)) + + return transformers + + +@build_transform.register(Fidelity) +def _(dim, type_requirement, dist_requirement): + return [] + + +@build_transform.register(Integer) +def _(dim, type_requirement, dist_requirement): + transformers = [] + if dist_requirement == "linear" and dim.prior_name[4:] in NON_LINEAR: + transformers.extend([Reverse(Quantize()), Linearize()]) + # NOTE: we do not turn back to integer even though linearize outputs real + # otherwise the mapping from exp(int) to int squashes out lots of possible values. + elif type_requirement == "real": + transformers.append(Reverse(Quantize())) + + return transformers + + +@build_transform.register(Real) +def _(dim, type_requirement, dist_requirement): + transformers = [] + if dim.precision is not None: + transformers.append(Precision(dim.precision)) + + if dist_requirement == "linear" and dim.prior_name in NON_LINEAR: + transformers.append(Linearize()) + elif type_requirement == "integer": + # NOTE: This may cause out-of-bound errors for rounded reals. Not fixed for now + # because there are no foreseeable algorithms that may require integer type. + transformers.append(Quantize()) + + return transformers + + +def transform(original_space, type_requirement, dist_requirement): + """Build a transformed space""" + space = TransformedSpace(original_space) + for dim in original_space.values(): + transformers = build_transform(dim, type_requirement, dist_requirement) + space.register( + TransformedDimension( + transformer=Compose(transformers, dim.type), original_dimension=dim + ) + ) + + return space + + +def reshape(space, shape_requirement): + """Build a reshaped space""" + if shape_requirement is None: + return space + + # We assume shape_requirement == 'flattened' + + reshaped_space = ReshapedSpace(space) + + for dim_index, dim in enumerate(space.values()): + if not dim.shape: + reshaped_space.register( + ReshapedDimension( + transformer=Identity(dim.type), + original_dimension=dim, + index=dim_index, + ) + ) + else: + for index in itertools.product(*map(range, dim.shape)): + key = f'{dim.name}[{",".join(map(str, index))}]' + reshaped_space.register( + ReshapedDimension( + transformer=View(dim.shape, index, dim.type), + original_dimension=dim, + name=key, + index=dim_index, + ) + ) + + return reshaped_space + + +def build_required_space( + original_space, type_requirement=None, shape_requirement=None, dist_requirement=None +): + """Build a :class:`orion.algo.space.Space` object which agrees to the `requirements` imposed + by the desired optimization algorithm. + + It uses appropriate cascade of `Transformer` objects per `orion.algo.space.Dimension` + contained in `original_space`. `ReshapedTransformer` objects are used above + the `Transformer` if the optimizatios algorithm requires flattened dimensions. + + Parameters + ---------- + original_space : `orion.algo.space.Space` + Original problem's definition of parameter space given by the user to Oríon. + type_requirement: str, None + String defining the requirement of the algorithm. It can be one of the following + - 'real', the dim should be transformed so type is `orion.algo.space.Real` + - 'integer', the dim should be transformed so type is `orion.algo.space.Integer` + - 'numerical', the dim should be transformed so type is either `orion.algo.space.Integer` or + `orion.algo.space.Real` + - None, no requirement + shape_requirement: str, None + String defining the shape requirement of the algorithm. + - 'flattened', any dimension with shape > 1 will be flattened + - None, no requirement + dist_requirement: str, None + String defining the distribution requirement of the algorithm. + - 'linear', any dimension with logarithmic prior while be linearized + - None, no requirement + + """ + space = transform(original_space, type_requirement, dist_requirement) + space = reshape(space, shape_requirement) + + return space + + +class Transformer(object, metaclass=ABCMeta): + """Define an (injective) function and its inverse. Base transformation class. + + Attributes + ---------- + target_type: str + Defines the type of the target space of the forward function. + It can provide one of the values: ``['real', 'integer', 'categorical']``. + domain_type: str + Is similar to ``target_type`` but it refers to the domain. + If it is ``None``, then it can receive inputs of any type. + + """ + + domain_type = None + target_type = None + + @abstractmethod + def transform(self, point): + """Transform a point from domain dimension to the target dimension.""" + pass + + @abstractmethod + def reverse(self, transformed_point, index=None): + """Reverse transform a point from target dimension to the domain dimension.""" + pass + + # pylint:disable=no-self-use + def infer_target_shape(self, shape): + """Return the shape of the dimension after transformation.""" + return shape + + def repr_format(self, what): + """Format a string for calling ``__repr__`` in `TransformedDimension`.""" + return "{}({})".format(self.__class__.__name__, what) + + def _get_hashable_members(self): + return (self.__class__.__name__, self.domain_type, self.target_type) + + # pylint:disable=protected-access + def __eq__(self, other): + """Return True if other is the same transformed dimension as self""" + if not isinstance(other, Transformer): + return False + return self._get_hashable_members() == other._get_hashable_members() + + +class Identity(Transformer): + """Implement an identity transformation. Everything as it is.""" + + def __init__(self, domain_type=None): + self._domain_type = domain_type + + @property + def first(self): + """Signals to ReshapedSpace whether this dimension should be used for `reverse`""" + return True + + def transform(self, point): + """Return `point` as it is.""" + return point + + # pylint:disable=unused-argument + def reverse(self, transformed_point, index=None): + """Return `transformed_point` as it is.""" + if index is not None: + return transformed_point[index] + return transformed_point + + def repr_format(self, what): + """Format a string for calling ``__repr__`` in `TransformedDimension`.""" + return what + + @property + def domain_type(self): + """Return declared domain type on initialization.""" + return self._domain_type + + @property + def target_type(self): + """Return domain type as this will be the target in a identity transformation.""" + return self.domain_type + + +class Compose(Transformer): + """Initialize composite transformer with a list of `Transformer` objects + and domain type on which it will be applied. + """ + + def __init__(self, transformers, base_domain_type=None): + try: + self.apply = transformers[-1] + except IndexError: + self.apply = Identity() + if len(transformers) > 1: + self.composition = Compose(transformers[:-1], base_domain_type) + else: + self.composition = Identity(base_domain_type) + assert ( + self.apply.domain_type is None + or self.composition.target_type == self.apply.domain_type + ) + + def transform(self, point): + """Apply transformers in the increasing order of the `transformers` list.""" + point = self.composition.transform(point) + return self.apply.transform(point) + + # pylint:disable=unused-argument + def reverse(self, transformed_point, index=None): + """Reverse transformation by reversing in the opposite order of the `transformers` list.""" + transformed_point = self.apply.reverse(transformed_point) + return self.composition.reverse(transformed_point) + + def interval(self, alpha=1.0): + """Return interval of composed transformation.""" + if hasattr(self.apply, "interval"): + return self.apply.interval(alpha) + + return None + + def infer_target_shape(self, shape): + """Return the shape of the dimension after transformation.""" + shape = self.composition.infer_target_shape(shape) + return self.apply.infer_target_shape(shape) + + def repr_format(self, what): + """Format a string for calling ``__repr__`` in `TransformedDimension`.""" + return self.apply.repr_format(self.composition.repr_format(what)) + + @property + def domain_type(self): + """Return base domain type.""" + return self.composition.domain_type + + @property + def target_type(self): + """Infer type of the tranformation target.""" + type_before = self.composition.target_type + type_after = self.apply.target_type + return type_after if type_after else type_before + + # pylint:disable=protected-access + def _get_hashable_members(self): + return ( + (self.__class__.__name__,) + + self.apply._get_hashable_members() + + self.composition._get_hashable_members() + ) + + +class Reverse(Transformer): + """Apply the reverse transformation that another one would do.""" + + def __init__(self, transformer: Transformer): + assert not isinstance( + transformer, OneHotEncode + ), "real to categorical is pointless" + self.transformer = transformer + + def transform(self, point): + """Use `reserve` of composed `transformer`.""" + return self.transformer.reverse(point) + + # pylint:disable=unused-argument + def reverse(self, transformed_point, index=None): + """Use `transform` of composed `transformer`.""" + return self.transformer.transform(transformed_point) + + def repr_format(self, what): + """Format a string for calling ``__repr__`` in `TransformedDimension`.""" + return "{}{}".format( + self.__class__.__name__, self.transformer.repr_format(what) + ) + + @property + def target_type(self): + """Return `domain_type` of composed `transformer`.""" + return self.transformer.domain_type + + @property + def domain_type(self): + """Return `target_type` of composed `transformer`.""" + return self.transformer.target_type + + +class Precision(Transformer): + """Round real numbers to requested precision.""" + + domain_type = "real" + target_type = "real" + + def __init__(self, precision=4): + self.precision = precision + + def transform(self, point): + """Round `point` to the requested precision, as numpy arrays.""" + # numpy.format_float_scientific precision starts at 0 + if isinstance(point, (list, tuple)) or ( + isinstance(point, numpy.ndarray) and point.shape + ): + format_float = numpy.vectorize( + lambda x: numpy.format_float_scientific(x, precision=self.precision - 1) + ) + point = format_float(point) + to_float = numpy.vectorize(float) + point = to_float(point) + else: + point = float( + numpy.format_float_scientific(point, precision=self.precision - 1) + ) + + return numpy.asarray(point) + + # pylint:disable=unused-argument + def reverse(self, transformed_point, index=None): + """Cast `transformed_point` to floats, as numpy arrays.""" + return self.transform(transformed_point) + + def repr_format(self, what): + """Format a string for calling ``__repr__`` in `TransformedDimension`.""" + return "{}({}, {})".format(self.__class__.__name__, self.precision, what) + + +class Quantize(Transformer): + """Transform real numbers to integers, violating injection.""" + + domain_type = "real" + target_type = "integer" + + def transform(self, point): + """Round `point` and then cast to integers, as numpy arrays.""" + quantized = numpy.round(numpy.asarray(point)).astype(int) + + if numpy.any(numpy.isinf(point)): + isinf = int(numpy.isinf(point)) + quantized = ( + isinf * (quantized - 1) * int(numpy.sign(point)) + + (1 - isinf) * (quantized - 1) + ).astype(int) + + return quantized + + # pylint:disable=unused-argument + def reverse(self, transformed_point, index=None): + """Cast `transformed_point` to floats, as numpy arrays.""" + return numpy.asarray(transformed_point).astype(float) + + +class Enumerate(Transformer): + """Enumerate categories. + + Effectively transform from a list of objects to a range of integers. + """ + + domain_type = "categorical" + target_type = "integer" + + def __init__(self, categories): + self.categories = categories + map_dict = {cat: i for i, cat in enumerate(categories)} + self._map = numpy.vectorize(lambda x: map_dict[x], otypes="i") + self._imap = numpy.vectorize(lambda x: categories[x], otypes=[numpy.object]) + + def __deepcopy__(self, memo): + """Make a deepcopy""" + return type(self)(self.categories) + + def transform(self, point): + """Return integers corresponding uniquely to the categories in `point`.""" + return self._map(point) + + # pylint:disable=unused-argument + def reverse(self, transformed_point, index=None): + """Return categories corresponding to their positions inside `transformed_point`.""" + return self._imap(transformed_point) + + # pylint:disable=unused-argument + def interval(self, alpha=1.0): + """Return the interval for the enumerated choices.""" + return (0, len(self.categories) - 1) + + +class OneHotEncode(Transformer): + """Encode categories to a 1-hot integer space representation.""" + + domain_type = "integer" + target_type = "real" + + def __init__(self, bound: int): + self.num_cats = bound + + def transform(self, point): + """Match a `point` containing integers to real vector representations of them. + + If the upper bound of integers supported by an instance of `OneHotEncode` + is less or equal to 2, then cast them to floats. + + .. note:: This transformation possibly appends one more tensor dimension to `point`. + """ + point_ = numpy.asarray(point) + assert ( + numpy.all(point_ < self.num_cats) + and numpy.all(point_ >= 0) + and numpy.all(point_ % 1 == 0) + ) + + if self.num_cats <= 2: + return numpy.asarray(point_, dtype=float) + + hot = numpy.zeros(self.infer_target_shape(point_.shape)) + grid = numpy.meshgrid( + *[numpy.arange(dim) for dim in point_.shape], indexing="ij" + ) + hot[grid + [point_]] = 1 + return hot + + # pylint:disable=unused-argument + def reverse(self, transformed_point, index=None): + """Match real vector representations to integers using an argmax function. + + If the number of dimensions is exactly 2, then use 0.5 as a decision boundary, + and convert representation to integers 0 or 1. + + If the number of dimensions is exactly 1, then return zeros. + + .. note:: This reverse transformation possibly removes the last tensor dimension + from `transformed_point`. + """ + + point_ = numpy.asarray(transformed_point) + if self.num_cats == 2: + return (point_ > 0.5).astype(int) + elif self.num_cats == 1: + return numpy.zeros_like(point_, dtype=int) + + assert point_.shape[-1] == self.num_cats + return point_.argmax(axis=-1) + + # pylint:disable=unused-argument + def interval(self, alpha=1.0): + """Return the interval for the one-hot encoding in proper shape.""" + if self.num_cats == 2: + return 0, 1 + else: + low = numpy.zeros(self.num_cats) + high = numpy.ones(self.num_cats) + + return low, high + + def infer_target_shape(self, shape): + """Infer that transformed points will have one more tensor dimension, + if the number of supported integers to transform is larger than 2. + """ + return tuple(list(shape) + [self.num_cats]) if self.num_cats > 2 else shape + + def _get_hashable_members(self): + return super(OneHotEncode, self)._get_hashable_members() + (self.num_cats,) + + +class Linearize(Transformer): + """Transform real numbers from loguniform to linear.""" + + domain_type = "real" + target_type = "real" + + def transform(self, point): + """Linearize logarithmic distribution.""" + return numpy.log(numpy.asarray(point)) + + # pylint:disable=unused-argument + def reverse(self, transformed_point, index=None): + """Turn linear distribution to logarithmic distribution.""" + return numpy.exp(numpy.asarray(transformed_point)) + + +class View(Transformer): + """Look-up single index in a dimensions with shape > 1""" + + def __init__(self, shape, index, domain_type=None): + self.shape = shape + self.index = index + self._domain_type = domain_type + + @property + def first(self): + """Signals to ReshapedSpace whether this dimension should be used for `reverse`""" + return sum(self.index) == 0 + + def transform(self, point): + """Only return one element of the group""" + return numpy.array(point)[self.index] + + def reverse(self, transformed_point, index=None): + """Only return packend point if view of first element, otherwise drop.""" + subset = transformed_point[index : index + numpy.prod(self.shape)] + return numpy.array(subset).reshape(self.shape) + + def interval(self, interval): + """Return corresponding view from interval""" + return (interval[0][self.index], interval[1][self.index]) + + @property + def domain_type(self): + """Return declared domain type on initialization.""" + return self._domain_type + + @property + def target_type(self): + """Return domain type as this will be the target in flatten transformation.""" + return self.domain_type + + def repr_format(self, what): + """Format a string for calling ``__repr__`` in `TransformedDimension`.""" + return "{}(shape={}, index={}, {})".format( + self.__class__.__name__, self.shape, self.index, what + ) + + +class TransformedDimension(object): + """Duck-type :class:`orion.algo.space.Dimension` to mimic its functionality, + while transform automatically and appropriately an underlying + :class:`orion.algo.space.Dimension` object according to a `Transformer` object. + """ + + NO_DEFAULT_VALUE = Dimension.NO_DEFAULT_VALUE + + def __init__(self, transformer, original_dimension): + self.original_dimension = original_dimension + self.transformer = transformer + + def transform(self, point): + """Expose `Transformer.transform` interface from underlying instance.""" + return self.transformer.transform(point) + + # pylint:disable=unused-argument + def reverse(self, transformed_point, index=None): + """Expose `Transformer.reverse` interface from underlying instance.""" + return self.transformer.reverse(transformed_point) + + def interval(self, alpha=1.0): + """Map the interval bounds to the transformed ones.""" + if hasattr(self.transformer, "interval"): + interval = self.transformer.interval() + if interval: + return interval + if self.original_dimension.type == "categorical": + return self.original_dimension.categories + + low, high = self.original_dimension.interval(alpha) + + return self.transform(low), self.transform(high) + + def __contains__(self, point): + """Reverse transform and ask the original dimension if it is a possible + sample. + """ + try: + orig_point = self.reverse(point) + except AssertionError: + return False + return orig_point in self.original_dimension + + def __repr__(self): + """Represent the object as a string.""" + return self.transformer.repr_format(repr(self.original_dimension)) + + # pylint:disable=protected-access + def __eq__(self, other): + """Return True if other is the same transformed dimension as self""" + if not (hasattr(other, "transformer") and hasattr(other, "original_dimension")): + return False + + return ( + self.transformer == other.transformer + and self.original_dimension == other.original_dimension + ) + + def __hash__(self): + """Hash of the transformed dimension""" + return hash(self._get_hashable_members()) + + # pylint:disable=protected-access + def _get_hashable_members(self): + """Hashable members of transformation and original dimension""" + return ( + self.transformer._get_hashable_members() + + self.original_dimension._get_hashable_members() + ) + + def validate(self): + """Validate original_dimension""" + self.original_dimension.validate() + + @property + def name(self): + """Do not change the name of the original dimension.""" + return self.original_dimension.name + + @property + def type(self): + """Ask transformer which is its target class.""" + type_ = self.transformer.target_type + return type_ if type_ != "invariant" else self.original_dimension.type + + @property + def prior_name(self): + """Do not change the prior name of the original dimension.""" + return self.original_dimension.prior_name + + @property + def shape(self): + """Wrap original shape with transformer, because it may have changed.""" + return self.transformer.infer_target_shape(self.original_dimension.shape) + + @property + def cardinality(self): + """Wrap original :class:`orion.algo.space.Dimension` capacity""" + # May be a discretized real, must reduce cardinality + if self.type == "integer": + return Integer.get_cardinality(self.shape, self.interval()) + + # Else we don't care what transformation is. + return self.original_dimension.cardinality + + @property + def default_value(self): + """Return the default value for this dimensions""" + if ( + self.original_dimension.default_value + is self.original_dimension.NO_DEFAULT_VALUE + ): + return self.NO_DEFAULT_VALUE + + return self.transform(self.original_dimension.default_value) + + +class ReshapedDimension(TransformedDimension): + """Duck-type :class:`orion.algo.space.Dimension` to mimic its functionality.""" + + def __init__(self, transformer, original_dimension, index, name=None): + super(ReshapedDimension, self).__init__(transformer, original_dimension) + if name is None: + name = original_dimension.name + self._name = name + self.index = index + + @property + def first(self): + """Signals to ReshapedSpace whether this dimension should be used for `reverse`""" + return self.transformer.first + + def transform(self, point): + """Expose `Transformer.transform` interface from underlying instance.""" + return self.transformer.transform(point) + + def reverse(self, transformed_point, index=None): + """Expose `Transformer.reverse` interface from underlying instance.""" + return self.transformer.reverse(transformed_point, index) + + def interval(self, alpha=1.0): + """Map the interval bounds to the transformed ones.""" + interval = self.original_dimension.interval(alpha) + if hasattr(interval[0], "shape") and numpy.prod(interval[0].shape) > 1: + return self.transformer.interval(interval) + + return interval + + @property + def cardinality(self): + """Compute cardinality""" + cardinality = super(ReshapedDimension, self).cardinality + if isinstance(self.transformer, View): + cardinality /= numpy.prod(self.transformer.shape) + + return cardinality + + def cast(self, point): + """Cast a point according to original_dimension and then transform it""" + return self.original_dimension.cast(point) + + @property + def shape(self): + """Shape is fixed to ().""" + return () + + @property + def name(self): + """Name of the view""" + return self._name + + +class TransformedSpace(Space): + """Wrap the :class:`orion.algo.space.Space` to support transformation methods. + + Parameter + --------- + space: `orion.algo.space.Space` + Original problem's definition of parameter space. + + """ + + contains = TransformedDimension + + def __init__(self, space, *args, **kwargs): + super(TransformedSpace, self).__init__(*args, **kwargs) + self._original_space = space + + def transform(self, trial): + """Transform a point that was in the original space to be in this one.""" + transformed_point = tuple( + dim.transform(flatten(trial.params)[name]) for name, dim in self.items() + ) + + return change_trial_params(trial, transformed_point, self) + + def reverse(self, transformed_trial): + """Reverses transformation so that a point from this `TransformedSpace` + to be in the original one. + """ + reversed_point = tuple( + dim.reverse(flatten(transformed_trial.params)[name]) + for name, dim in self.items() + ) + + return change_trial_params( + transformed_trial, + reversed_point, + self, + ) + + def sample(self, n_samples=1, seed=None): + """Sample from the original dimension and forward transform them.""" + trials = self._original_space.sample(n_samples=n_samples, seed=seed) + return [self.transform(trial) for trial in trials] + + +class ReshapedSpace(Space): + """Wrap the `TransformedSpace` to support reshape methods. + + Parameter + --------- + space: `orion.core.worker.TransformedSpace` + Transformed version of the orinigal problem's definition of parameter space. + + """ + + contains = ReshapedDimension + + def __init__(self, original_space, *args, **kwargs): + super(ReshapedSpace, self).__init__(*args, **kwargs) + self._original_space = original_space + + @property + def original(self): + """Original space without reshape or transformations""" + return self._original_space + + def transform(self, trial): + """Transform a point that was in the original space to be in this one.""" + return self.reshape(self.original.transform(trial)) + + def reverse(self, transformed_trial): + """Reverses transformation so that a point from this `ReshapedSpace` to be in the original + one. + """ + return self.original.reverse(self.restore_shape(transformed_trial)) + + def reshape(self, trial): + """Reshape the point""" + point = format_trials.trial_to_tuple(trial, self._original_space) + reshaped_point = [] + for dim in self.values(): + reshaped_point.append(dim.transform(point[dim.index])) + + return change_trial_params(trial, reshaped_point, self) + + def restore_shape(self, transformed_trial): + """Restore shape.""" + transformed_point = format_trials.trial_to_tuple(transformed_trial, self) + original_keys = self._original_space.keys() + point = [None for _ in original_keys] + for index, dim in enumerate(self.values()): + if dim.first: + point_index = original_keys.index(dim.original_dimension.name) + point[point_index] = dim.reverse(transformed_point, index) + + return change_trial_params(transformed_trial, point, self._original_space) + + def sample(self, n_samples=1, seed=None): + """Sample from the original dimension and forward transform them.""" + trials = self.original.sample(n_samples=n_samples, seed=seed) + return [self.reshape(trial) for trial in trials] + + def __contains__(self, key_or_trial): + """Check whether `trial` is within the bounds of the space. + Or check if a name for a dimension is registered in this space. + + Parameters + ---------- + key_or_trial: str or `orion.core.worker.trial.Trial` + If str, test if the string is a dimension part of the search space. + If a Trial, test if trial's hyperparameters fit the current search space. + + """ + if isinstance(key_or_trial, str): + return super(ReshapedSpace, self).__contains__(key_or_trial) + + return self.restore_shape(key_or_trial) in self.original + + @property + def cardinality(self): + """Reshape does not affect cardinality""" + return self.original.cardinality + + +def change_trial_params(trial, point, space): + """Convert params in Param objects and update trial""" + new_trial = copy.copy(trial) + # pylint: disable=protected-access + new_trial._params = format_trials.tuple_to_trial(point, space)._params + return new_trial diff --git a/sspace/orion/utils.py b/sspace/orion/utils.py new file mode 100644 index 0000000..6fb6dec --- /dev/null +++ b/sspace/orion/utils.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +""" +Flatten and unflatten dicts +=========================== + +Turn deep dictionaries into flat key.subkey versions and vice-versa. + +""" + + +import copy + + +def flatten(dictionary): + """Turn all nested dict keys into a {key}.{subkey} format""" + + def _flatten(dictionary): + if dictionary == {}: + return dictionary + + key, value = dictionary.popitem() + if not isinstance(value, dict) or not value: + new_dictionary = {key: value} + new_dictionary.update(flatten(dictionary)) + return new_dictionary + + flat_sub_dictionary = flatten(value) + for flat_sub_key in list(flat_sub_dictionary.keys()): + flat_key = key + "." + flat_sub_key + flat_sub_dictionary[flat_key] = flat_sub_dictionary.pop(flat_sub_key) + + new_dictionary = flat_sub_dictionary + new_dictionary.update(flatten(dictionary)) + return new_dictionary + + return _flatten(copy.deepcopy(dictionary)) + + +def unflatten(dictionary): + """Turn all keys with format {key}.{subkey} into nested dictionaries""" + unflattened_dictionary = dict() + for key, value in dictionary.items(): + parts = key.split(".") + sub_dictionary = unflattened_dictionary + for part in parts[:-1]: + if part not in sub_dictionary: + sub_dictionary[part] = dict() + sub_dictionary = sub_dictionary[part] + sub_dictionary[parts[-1]] = value + return unflattened_dictionary + + +def float_to_digits_list(number): + """Convert a float into a list of digits, without conserving exponant""" + # Get rid of scientific-format exponant + str_number = str(number) + str_number = str_number.split("e")[0] + + res = [int(ele) for ele in str_number if ele.isdigit()] + + # Remove trailing 0s in front + while len(res) > 1 and res[0] == 0: + res.pop(0) + + # Remove training 0s at end + while len(res) > 1 and res[-1] == 0: + res.pop(-1) + + return res + + +def tuple_to_dict(data, space): + """Create a `orion.core.worker.trial.Trial` object from `data`. + + Parameters + ---------- + data: tuple + A tuple representing a sample point from `space`. + + space: `orion.algo.space.Space` + Definition of problem's domain. + + Returns + ------- + A dictionary + """ + if len(data) != len(space): + raise ValueError( + f"Data point is not compatible with search space:\ndata: {data}\nspace: {space}" + ) + + params = {} + for i, dim in enumerate(space.values()): + # params.append(dict(name=dim.name, type=dim.type, value=data[i])) + params[dim.name] = data[i] + + return params diff --git a/sspace/space.py b/sspace/space.py index bb7f625..38e48e4 100644 --- a/sspace/space.py +++ b/sspace/space.py @@ -4,7 +4,7 @@ import hashlib from typing import Dict, Union, List, Optional -from orion.core.utils.flatten import flatten +from sspace.orion.utils import flatten from sspace.conditionals import eq, ne, lt, gt, contains, both, either, _Condition from sspace.backends import _OrionSpaceBuilder, _ConfigSpaceBuilder, _ShortSerializer @@ -705,10 +705,10 @@ def flatten(self, dictionary): def unflatten(self, dictionary): """Unflatten the a dictionary using the space to know when to unflatten or not - + Examples -------- - + >>> space = Space() >>> optim = space.categorical('optimizer', ['sgd', 'adam']) >>> sgd_lr = space.loguniform('optimizer.lr', 1, 2, quantization=0.01) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..b8e2cc4 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Common fixtures and utils for unittests and functional tests.""" + +import numpy +import pytest + + +@pytest.fixture(scope="function") +def seed(): + """Return a fixed ``numpy.random.RandomState`` and global seed.""" + seed = 5 + rng = numpy.random.RandomState(seed) + numpy.random.seed(seed) + return rng diff --git a/tests/orion/test_space.py b/tests/orion/test_space.py new file mode 100644 index 0000000..46b219f --- /dev/null +++ b/tests/orion/test_space.py @@ -0,0 +1,1008 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Example usage and tests for :mod:`orion.algo.space`.""" + +import sys +from collections import OrderedDict, defaultdict + +import numpy as np +import pytest +from numpy.testing import assert_array_equal as assert_eq +from scipy.stats import distributions as dists + +from sspace.orion.space import ( + Categorical, + Dimension, + Fidelity, + Integer, + Real, + Space, + check_random_state, +) +from sspace.orion.utils import tuple_to_dict, unflatten +from sspace.orion.transformer import Precision +from sspace.orion.legacy import format_trials, Param, Trial + + +class TestCheckRandomState: + """Test `orion.algo.space.check_random_state`""" + + def test_rng_null(self): + """Test that passing None returns numpy._rand""" + assert check_random_state(None) is np.random.mtrand._rand + + def test_rng_random_state(self): + """Test that passing RandomState returns itself""" + rng = np.random.RandomState(1) + assert check_random_state(rng) is rng + + def test_rng_int(self): + """Test that passing int returns RandomState""" + rng = check_random_state(1) + assert isinstance(rng, np.random.RandomState) + assert rng is not np.random.mtrand._rand + + def test_rng_tuple(self): + """Test that passing int returns RandomState""" + rng = check_random_state((1, 12, 123)) + assert isinstance(rng, np.random.RandomState) + assert rng is not np.random.mtrand._rand + + def test_rng_invalid_value(self): + """Test that passing int returns RandomState""" + with pytest.raises(ValueError) as exc: + check_random_state("oh_no_oh_no") + + assert "'oh_no_oh_no' cannot be used to seed" in str(exc.value) + + +class TestDimension(object): + """Test methods of a Dimension object.""" + + def test_simple_instance(self, seed): + """Test Dimension.__init__.""" + dim = Dimension("yolo", "norm", 0.9, 0.1) + samples = dim.sample(seed=seed) + assert len(samples) == 1 + assert dists.norm.rvs(0.9, 0.1) == samples[0] + + assert dists.norm.interval(1.0, 0.9, 0.1) == dim.interval() + assert dists.norm.interval(0.5, 0.9, 0.1) == dim.interval(0.5) + + assert ( + str(dim) == "Dimension(name=yolo, prior={norm: (0.9, 0.1), {}}, " + "shape=(), default value=None)" + ) + + assert dim.name == "yolo" + assert dim.type == "dimension" + assert dim.shape == () + + def test_shaped_instance(self, seed): + """Use shape keyword argument.""" + dim = Dimension("yolo", "norm", 0.9, shape=(3, 2)) + samples = dim.sample(seed=seed) + assert len(samples) == 1 + assert_eq(dists.norm.rvs(0.9, size=(3, 2)), samples[0]) + + assert dim.shape == (3, 2) + + dim = Dimension("yolo", "norm", 0.9, shape=4) + samples = dim.sample(seed=seed) + assert len(samples) == 1 + assert_eq(dists.norm.rvs(0.9, size=4), samples[0]) + + assert dim.shape == (4,) + + def test_ban_size_kwarg(self): + """Should not be able to use 'size' kwarg.""" + with pytest.raises(ValueError): + Dimension("yolo", "norm", 0.9, size=(3, 2)) + + def test_ban_seed_kwarg(self): + """Should not be able to use 'seed' kwarg.""" + with pytest.raises(ValueError): + Dimension("yolo", "norm", 0.9, seed=8) + + def test_ban_rng_kwarg(self): + """Should not be able to use 'random_state' kwarg.""" + with pytest.raises(ValueError): + Dimension("yolo", "norm", 0.9, random_state=8) + + def test_with_predefined_dist(self, seed): + """Use an already defined distribution object as prior arg.""" + dim = Dimension("yolo", dists.norm, 0.9) + samples = dim.sample(seed=seed) + assert len(samples) == 1 + assert dists.norm.rvs(0.9) == samples[0] + + def test_ban_discrete_kwarg(self): + """Do not allow use for 'discrete' kwarg, because now there's `_Discrete`.""" + with pytest.raises(ValueError) as exc: + Dimension("yolo", "uniform", -3, 4, shape=(4, 4), discrete=True) + assert "pure `_Discrete`" in str(exc.value) + + def test_many_samples(self, seed): + """More than 1.""" + dim = Dimension("yolo", "uniform", -3, 4, shape=(4, 4)) + samples = dim.sample(n_samples=4, seed=seed) + assert len(samples) == 4 + assert_eq(dists.uniform.rvs(-3, 4, size=(4, 4)), samples[0]) + + def test_interval(self): + """Test that bounds on variable.""" + dim = Dimension("yolo", "uniform", -3, 4) + assert dim.interval(1.0) == ( + -3.0, + 1.0, + ) # reminder that `scale` is not upper bound + + def test_contains_bounds(self): + """Test __contains__ for bounds.""" + dim = Dimension("yolo", "uniform", -3, 4) + with pytest.raises(NotImplementedError): + assert -3 in dim + + def test_contains_shape(self): + """Test __contains__ for shape check.""" + dim = Dimension(None, "uniform", -3, 4, shape=(4, 4)) + + with pytest.raises(NotImplementedError): + assert dists.uniform.rvs(-3, 4, size=(4, 4)) in dim + + def test_set_bad_name(self): + """Try setting a name other than str or None.""" + dim = Dimension("yolo", "uniform", -3, 4, shape=(4, 4)) + with pytest.raises(TypeError): + dim.name = 4 + + def test_init_with_default_value(self): + """Make sure the __contains__ method does not work""" + with pytest.raises(NotImplementedError): + Dimension("yolo", "uniform", -3, 4, default_value=4) + + def test_no_default_value(self): + """Test that no giving a default value assigns None""" + dim = Dimension("yolo", "uniform", -3, 4) + assert dim.default_value is None + + def test_no_prior(self): + """Test that giving a null prior defaults prior_name to `None`.""" + dim = Dimension("yolo", None) + print(dim._prior_name) + assert dim.prior is None + assert dim._prior_name == "None" + + @pytest.mark.skipif( + sys.version_info < (3, 6), reason="requires python3.6 or higher" + ) + def test_get_prior_string(self): + """Test that prior string can be rebuilt.""" + dim = Dimension("yolo", "alpha", 1, 2, 3, some="args", plus="fluff", n=4) + assert ( + dim.get_prior_string() == "alpha(1, 2, 3, some='args', plus='fluff', n=4)" + ) + + def test_get_prior_string_uniform(self): + """Test special uniform args are handled properly.""" + dim = Dimension("yolo", "uniform", 1, 2) + assert dim.get_prior_string() == "uniform(1, 3)" + + def test_get_prior_string_default_values(self, monkeypatch): + """Test that default_value are included.""" + + def contains(self, value): + return True + + monkeypatch.setattr(Dimension, "__contains__", contains) + dim = Dimension("yolo", "alpha", 1, 2, default_value=1) + assert dim.get_prior_string() == "alpha(1, 2, default_value=1)" + + def test_get_prior_string_shape(self): + """Test that shape is included.""" + dim = Dimension("yolo", "alpha", 1, 2, shape=(2, 3)) + assert dim.get_prior_string() == "alpha(1, 2, shape=(2, 3))" + + def test_get_prior_string_loguniform(self): + """Test that special loguniform prior name is replaced properly.""" + dim = Dimension("yolo", "reciprocal", 1e-10, 1) + assert dim.get_prior_string() == "loguniform(1e-10, 1)" + + def test_get_prior_string_normal(self): + """Test that special norm prior name is replaced properly.""" + dim = Dimension("yolo", "norm", 1e-10, 1) + assert dim.get_prior_string() == "normal(1e-10, 1)" + + def test_prior_name(self): + """Test prior name is correct in dimension""" + dim = Dimension("yolo", "reciprocal", 1e-10, 1) + assert dim.prior_name == "reciprocal" + + dim = Dimension("yolo", "norm", 0.9) + assert dim.prior_name == "norm" + + dim = Real("yolo", "uniform", 1, 2) + assert dim.prior_name == "uniform" + + dim = Integer("yolo1", "uniform", -3, 6) + assert dim.prior_name == "int_uniform" + + dim = Integer("yolo1", "norm", -3, 6) + assert dim.prior_name == "int_norm" + + categories = {"asdfa": 0.1, 2: 0.2, 3: 0.3, "lalala": 0.4} + dim = Categorical("yolo", categories) + assert dim.prior_name == "choices" + + +class TestReal(object): + """Test methods of a `Real` object.""" + + def test_get_prior_string_precision(self): + """Test that precision is included.""" + dim = Real("yolo", "uniform", 1, 2, precision=5) + assert dim.get_prior_string() == "uniform(1, 3, precision=5)" + + def test_get_prior_string_no_precision(self): + """Test that default precision is not included.""" + dim = Real("yolo", "uniform", 1, 2, precision=4) + assert dim.get_prior_string() == "uniform(1, 3)" + + def test_simple_instance(self, seed): + """Test Real.__init__.""" + dim = Real("yolo", "norm", 0.9) + samples = dim.sample(seed=seed) + assert len(samples) == 1 + assert dists.norm.rvs(0.9) == samples[0] + + assert dists.norm.interval(1.0, 0.9) == dim.interval() + assert dists.norm.interval(0.5, 0.9) == dim.interval(0.5) + + assert 1.0 in dim + + assert ( + str(dim) + == "Real(name=yolo, prior={norm: (0.9,), {}}, shape=(), default value=None)" + ) + assert dim.name == "yolo" + assert dim.type == "real" + assert dim.shape == () + + def test_contains_extra_bounds(self): + """Test __contains__ for the extra bounds.""" + dim = Real("yolo", "norm", 0, 3, low=-3, high=+3) + assert dists.uniform.rvs(-3, 3) in dim + assert -4 not in dim + assert +4 not in dim + assert (1, 2) not in dim + + def test_sample_from_extra_bounds_good(self): + """Randomized test **successful** sampling with the extra bounds.""" + dim = Real("yolo", "norm", 0, 2, low=-5, high=+5, shape=(4, 4)) + for _ in range(8): + samples = dim.sample(8) + for sample in samples: + assert sample in dim + + def test_sample_from_extra_bounds_bad(self): + """Randomized test **unsuccessfully** sampling with the extra bounds.""" + dim = Real("yolo", "norm", 0, 2, low=-2, high=+2, shape=(4, 4)) + with pytest.raises(ValueError) as exc: + dim.sample(8) + assert "Improbable bounds" in str(exc.value) + + def test_bad_bounds(self): + """Try setting bound with high <= low.""" + with pytest.raises(ValueError): + Real("yolo", "norm", 0, 2, low=+2, high=-2, shape=(4, 4)) + with pytest.raises(ValueError): + Real("yolo", "norm", 0, 2, low=+2, high=+2, shape=(4, 4)) + + def test_interval(self): + """Interval takes into account explicitly bounds.""" + dim = Real("yolo", "norm", 0, 3, low=-3, high=+3) + assert dim.interval() == (-3, 3) + + dim = Real("yolo", "alpha", 0.9, low=-3, high=+3) + assert dim.interval() == (0, 3) + + dim = Real("yolo", "uniform", -2, 4, low=-3, high=+3) + assert dim.interval() == (-2.0, 2.0) + + def test_init_with_default_value(self): + """Make sure the default value is set""" + dim = Real("yolo", "uniform", -3, 10, default_value=2.0) + + assert type(dim.default_value) is float + + def test_set_outside_bounds_default_value(self): + """Make sure default value is inside the bounds""" + with pytest.raises(ValueError): + Real("yolo", "uniform", -3, 2, default_value=5) + + def test_no_default_value(self): + """Make sure the default value is None""" + dim = Real("yolo", "uniform", -3, 4) + assert dim.default_value is None + + def test_cast_list(self): + """Make sure list are cast to float and returned as list of values""" + dim = Real("yolo", "uniform", -3, 4) + assert dim.cast(["1", "2"]) == [1.0, 2.0] + + def test_cast_array(self): + """Make sure array are cast to float and returned as array of values""" + dim = Real("yolo", "uniform", -3, 4) + assert np.all(dim.cast(np.array(["1", "2"])) == np.array([1.0, 2.0])) + + def test_basic_cardinality(self): + """Brute force test for a simple cardinality use case""" + dim = Real("yolo", "reciprocal", 0.043, 2.3, precision=2) + order_0012 = np.arange(43, 99 + 1) + order_010 = np.arange(10, 99 + 1) + order_23 = np.arange(10, 23 + 1) + assert dim.cardinality == sum(map(len, [order_0012, order_010, order_23])) + + @pytest.mark.parametrize( + "prior_name,min_bound,max_bound,precision,cardinality", + [ + ("uniform", 0, 10, 2, np.inf), + ("reciprocal", 1e-10, 1e-2, None, np.inf), + ("reciprocal", 0.1, 1, 2, 90 + 1), + ("reciprocal", 0.1, 1.2, 2, 90 + 2 + 1), + ("reciprocal", 0.1, 1.25, 2, 90 + 2 + 1), + ("reciprocal", 1e-4, 1e-2, 2, 90 * 2 + 1), + ("reciprocal", 1e-5, 1e-2, 2, 90 + 90 * 2 + 1), + ("reciprocal", 5.234e-3, 1.5908e-2, 2, (90 - 52) + 15 + 1), + ("reciprocal", 5.234e-3, 1.5908e-2, 4, (9 * 10 ** 3 - 5234) + 1590 + 1), + ( + "reciprocal", + 5.234e-5, + 1.5908e-2, + 4, + (9 * 10 ** 3 * 3 - 5234) + 1590 + 1, + ), + ("uniform", 1e-5, 1e-2, 2, np.inf), + ("uniform", -3, 4, 3, np.inf), + ], + ) + def test_cardinality( + self, prior_name, min_bound, max_bound, precision, cardinality + ): + """Check whether cardinality is correct""" + dim = Real( + "yolo", prior_name, min_bound, max_bound, precision=precision, shape=None + ) + assert dim.cardinality == cardinality + dim = Real( + "yolo", prior_name, min_bound, max_bound, precision=precision, shape=(2, 3) + ) + assert dim.cardinality == cardinality ** (2 * 3) + + +class TestInteger(object): + """Test methods of a `Integer` object.""" + + def test_simple_instance(self, seed): + """Test Integer.__init__.""" + dim = Integer("yolo", "uniform", -3, 6) + samples = dim.sample(seed=seed) + assert len(samples) == 1 + assert samples[0] == -2 + + assert dim.interval() == (-3, 3) + assert dim.interval(0.5) == (-2, 2) + + assert 1.0 in dim + + assert ( + str(dim) == "Integer(name=yolo, prior={uniform: (-3, 6), {}}, " + "shape=(), default value=None)" + ) + + assert dim.name == "yolo" + assert dim.type == "integer" + assert dim.shape == () + + def test_inclusive_intervals(self): + """Test that discretized bounds are valid""" + dim = Integer("yolo", "uniform", -3, 5.5) + assert dim.interval() == (-3, 3) + + def test_contains(self): + """Check for integer test.""" + dim = Integer("yolo", "uniform", -3, 6) + + assert 0.1 not in dim + assert (0.1, -0.2) not in dim + assert 0 in dim + assert (1, 2) not in dim + assert 6 not in dim + assert -3 in dim + assert -4 not in dim + + def test_interval_with_infs(self): + """Regression test: Interval handles correctly extreme bounds.""" + dim = Integer("yolo", "poisson", 5) + # XXX: Complete this on both end of interval when scipy bug is fixed + assert dim.interval()[1] == np.inf + + @pytest.mark.xfail(reason="scipy bug") + def test_scipy_integer_dist_interval_bug(self): + """Scipy does not return the correct answer for integer distributions.""" + dim = Integer("yolo", "randint", -3, 6) + assert dim.interval() == (-3, 6) + assert dim.interval(1.0) == (-3, 6) + assert dim.interval(0.9999) == (-3, 6) + + dim = Integer("yolo2", "randint", -2, 4, loc=8) + assert dim.interval() == (6, 12) + + def test_init_with_default_value(self): + """Make sure the type of the default value is int""" + dim = Integer("yolo", "uniform", -3, 10, default_value=2) + + assert type(dim.default_value) is int + + def test_set_outside_bounds_default_value(self): + """Make sure the default value is inside the bounds of the dimensions""" + with pytest.raises(ValueError): + Integer("yolo", "uniform", -3, 2, default_value=4) + + def test_no_default_value(self): + """Make sure the default value is None""" + dim = Integer("yolo", "uniform", -3, 4) + assert dim.default_value is None + + def test_cast_borders(self): + """Make sure cast to int returns correct borders""" + dim = Integer("yolo", "uniform", -3, 5) + assert dim.cast(-3.0) == -3 + assert dim.cast(2.0) == 2 + + def test_cast_list(self): + """Make sure list are cast to int and returned as list of values""" + dim = Integer("yolo", "uniform", -3, 5) + assert dim.cast(["1", "2"]) == [1, 2] + + def test_cast_array(self): + """Make sure array are cast to int and returned as array of values""" + dim = Integer("yolo", "uniform", -3, 5) + assert np.all(dim.cast(np.array(["1", "2"])) == np.array([1, 2])) + + def test_get_prior_string_discrete(self): + """Test that discrete is included.""" + dim = Integer("yolo", "uniform", 1, 2) + assert dim.get_prior_string() == "uniform(1, 3, discrete=True)" + + +class TestCategorical(object): + """Test methods of a `Categorical` object.""" + + def test_with_tuple(self, seed): + """Test Categorical.__init__ with a tuple.""" + categories = ("asdfa", 2) + dim = Categorical("yolo", categories) + samples = dim.sample(seed=seed) + assert len(samples) == 1 + assert samples[0] == "asdfa" + assert dim._probs == (0.5, 0.5) + + assert categories == dim.categories + + assert 2 in dim + assert 3 not in dim + + assert ( + str(dim) == "Categorical(name=yolo, prior={asdfa: 0.50, 2: 0.50}, " + "shape=(), default value=None)" + ) + + assert dim.name == "yolo" + assert dim.type == "categorical" + assert dim.shape == () + + def test_with_dict(self, seed): + """Test Categorical.__init__ with a dictionary.""" + probs = (0.1, 0.2, 0.3, 0.4) + categories = ("asdfa", 2, 3, 4) + dim = Categorical("yolo", OrderedDict(zip(categories, probs))) + samples = dim.sample(seed=seed) + assert len(samples) == 1 + assert samples[0] == 2 + assert dim._probs == probs + + assert categories == dim.categories + + assert 2 in dim + assert 0 not in dim + + assert dim.name == "yolo" + assert dim.type == "categorical" + assert dim.shape == () + + def test_probabilities_are_ok(self, seed): + """Test that the probabilities given are legit using law of big numbers.""" + bins = defaultdict(int) + probs = (0.1, 0.2, 0.3, 0.4) + categories = ("asdfa", "2", "3", "4") + categories = OrderedDict(zip(categories, probs)) + dim = Categorical("yolo", categories) + for _ in range(500): + sample = dim.sample(seed=seed)[0] + bins[sample] += 1 + for keys in bins.keys(): + bins[keys] /= float(500) + for key, value in categories.items(): + assert abs(bins[key] - value) < 0.01 + + def test_contains_wrong_shape(self): + """Check correct category but wrongly shaped array.""" + categories = {"asdfa": 0.1, 2: 0.2, 3: 0.3, 4: 0.4} + dim = Categorical("yolo", categories, shape=2) + + assert 3 not in dim + assert ("asdfa", 2) in dim + + def test_repr_too_many_cats(self): + """Check ellipsis on str/repr of too many categories.""" + categories = tuple(range(10)) + dim = Categorical("yolo", categories, shape=2) + + assert ( + str(dim) == "Categorical(name=yolo, " + "prior={0: 0.10, 1: 0.10, ..., 8: 0.10, 9: 0.10}, " + "shape=(2,), default value=None)" + ) + + def test_bad_probabilities(self): + """User provided bad probabilities.""" + categories = {"asdfa": 0.05, 2: 0.2, 3: 0.3, 4: 0.4} + with pytest.raises(ValueError): + Categorical("yolo", categories, shape=2) + + def test_interval(self): + """Check that calling `Categorical.interval` raises `RuntimeError`.""" + categories = {"asdfa": 0.1, 2: 0.2, 3: 0.3, 4: 0.4} + dim = Categorical("yolo", categories, shape=2) + + assert dim.interval() == ("asdfa", 2, 3, 4) + + def test_that_objects_types_are_ok(self): + """Check that output samples are of the correct type. + + Don't let numpy mess with their automatic type inference. + """ + categories = {"asdfa": 0.1, 2: 0.2, 3: 0.3, "lalala": 0.4} + dim = Categorical("yolo", categories) + + assert "2" not in dim + assert 2 in dim + assert "asdfa" in dim + + dim = Categorical("yolo", categories, shape=(2,)) + + assert ["2", "asdfa"] not in dim + assert [2, "asdfa"] in dim + + def test_init_with_default_value_string(self): + """Make sure the default value is of the correct type""" + categories = {"asdfa": 0.1, 2: 0.2, 3: 0.3, "lalala": 0.4} + dim = Categorical("yolo", categories, default_value="asdfa") + + assert type(dim.default_value) is str + + def test_init_with_default_value_int(self): + """Make sure the default value is of the correct type""" + categories = {"asdfa": 0.1, 2: 0.2, 3: 0.3, "lalala": 0.4} + dim = Categorical("yolo", categories, default_value=2) + + assert type(dim.default_value) is int + + def test_init_with_wrong_default_value(self): + """Make sure the default value exists""" + categories = {"asdfa": 0.1, 2: 0.2, 3: 0.3, "lalala": 0.4} + + with pytest.raises(ValueError): + Categorical("yolo", categories, default_value=2.3) + + def test_no_default_value(self): + """Make sure the default value is None""" + categories = {"asdfa": 0.1, 2: 0.2, 3: 0.3, "lalala": 0.4} + dim = Categorical("yolo", categories) + assert dim.default_value is None + + def test_cast_list(self): + """Make sure list are cast to categories and returned as list""" + categories = {"asdfa": 0.1, 2: 0.2, 3.0: 0.3, "lalala": 0.4} + dim = Categorical("yolo", categories) + assert dim.cast(["asdfa"]) == ["asdfa"] + assert dim.cast(["2"]) == [2] + assert dim.cast(["3.0"]) == [3.0] + + def test_cast_list_multidim(self): + """Make sure array are cast to int and returned as array of values""" + categories = list(range(10)) + categories[0] = "asdfa" + categories[2] = "lalala" + dim = Categorical("yolo", categories, shape=2) + sample = ["asdfa", "1"] # np.array(['asdfa', '1'], dtype=np.object) + assert dim.cast(sample) == ["asdfa", 1] + + def test_cast_array_multidim(self): + """Make sure array are cast to int and returned as array of values""" + categories = list(range(10)) + categories[0] = "asdfa" + categories[2] = "lalala" + dim = Categorical("yolo", categories, shape=2) + sample = np.array(["asdfa", "1"], dtype=np.object) + assert np.all(dim.cast(sample) == np.array(["asdfa", 1], dtype=np.object)) + + def test_cast_bad_category(self): + """Make sure array are cast to int and returned as array of values""" + categories = list(range(10)) + dim = Categorical("yolo", categories, shape=2) + sample = np.array(["asdfa", "1"], dtype=np.object) + with pytest.raises(ValueError) as exc: + dim.cast(sample) + assert "Invalid category: asdfa" in str(exc.value) + + def test_get_prior_string_list(self): + """Test that prior string can be rebuilt with list of choices.""" + categories = list(range(10)) + categories[0] = "asdfa" + categories[2] = "lalala" + dim = Categorical( + "yolo", categories, shape=2, default_value=["asdfa", "lalala"] + ) + assert dim.get_prior_string() == ( + "choices(['asdfa', 1, 'lalala', 3, 4, 5, 6, 7, 8, 9], " + "shape=2, default_value=['asdfa', 'lalala'])" + ) + + def test_get_prior_string_dict(self): + """Test that prior string can be rebuilt with dict of choices.""" + categories = {"asdfa": 0.1, 2: 0.2, 3: 0.3, "lalala": 0.4} + dim = Categorical( + "yolo", categories, shape=2, default_value=["asdfa", "lalala"] + ) + assert dim.get_prior_string() == ( + "choices({'asdfa': 0.10, 2: 0.20, 3: 0.30, 'lalala': 0.40}, " + "shape=2, default_value=['asdfa', 'lalala'])" + ) + + +class TestFidelity(object): + """Test methods of a Fidelity object.""" + + def test_simple_instance(self): + """Test Fidelity.__init__.""" + dim = Fidelity("epoch", 1, 2) + + assert str(dim) == "Fidelity(name=epoch, low=1, high=2, base=2)" + assert dim.low == 1 + assert dim.high == 2 + assert dim.base == 2 + assert dim.name == "epoch" + assert dim.type == "fidelity" + assert dim.shape is None + + def test_fidelity_omit_base(self): + """Test that default base is not included.""" + dim = Fidelity("epoch", 1, 2, base=2) + assert dim.get_prior_string() == "fidelity(1, 2)" + + def test_fidelity_set_base(self): + """Test that base is included.""" + dim = Fidelity("epoch", 1, 2, base=3) + assert dim.get_prior_string() == "fidelity(1, 2, base=3)" + + def test_min_resources(self): + """Test that an error is raised if min is smaller than 1""" + with pytest.raises(AttributeError) as exc: + Fidelity("epoch", 0, 2) + assert "Minimum resources must be a positive number." == str(exc.value) + + def test_min_max_resources(self): + """Test that an error is raised if min is larger than max""" + with pytest.raises(AttributeError) as exc: + Fidelity("epoch", 3, 2) + assert "Minimum resources must be smaller than maximum resources." == str( + exc.value + ) + + def test_base(self): + """Test that an error is raised if base is smaller than 1""" + with pytest.raises(AttributeError) as exc: + Fidelity("epoch", 1, 2, 0) + assert "Base should be greater than or equal to 1" == str(exc.value) + + def test_sampling(self): + """Make sure Fidelity simply returns `high`""" + dim = Fidelity("epoch", 1, 2) + assert dim.sample() == [2] + dim = Fidelity("epoch", 1, 5) + assert dim.sample() == [5] + dim = Fidelity("epoch", 1, 5) + assert dim.sample(4) == [5] * 4 + + def test_default_value(self): + """Make sure Fidelity simply returns `high`""" + dim = Fidelity("epoch", 1, 2) + assert dim.default_value == 2 + dim = Fidelity("epoch", 1, 5) + assert dim.default_value == 5 + + def test_contains(self): + """Make sure fidelity.__contains__ tests based on (min, max)""" + dim = Fidelity("epoch", 1, 10) + + assert 0 not in dim + assert 1 in dim + assert 5 in dim + assert 10 in dim + assert 20 not in dim + + def test_interval(self): + """Check that interval() is (min, max).""" + dim = Fidelity("epoch", 1, 10) + dim.interval() == (1, 10) + + def test_cast(self): + """Check that error is being raised.""" + dim = Fidelity("epoch", 1, 10) + with pytest.raises(NotImplementedError): + dim.cast() + + +class TestSpace(object): + """Test methods of a `Space` object.""" + + def test_init(self): + """Instantiate space, must be a dictionary.""" + space = Space() + assert isinstance(space, dict) + + def test_register_and_contain(self): + """Register bunch of dimensions, check if points/name are in space.""" + space = Space() + + trial = Trial(params=[{"name": "no", "value": 0, "type": "integer"}]) + + assert "yolo" not in space + assert trial not in space + + categories = {"asdfa": 0.1, 2: 0.2, 3: 0.3, 4: 0.4} + dim = Categorical("yolo", categories, shape=2) + space.register(dim) + dim = Integer("yolo2", "uniform", -3, 6) + space.register(dim) + dim = Real("yolo3", "norm", 0.9) + space.register(dim) + + assert "yolo" in space + assert "yolo2" in space + assert "yolo3" in space + + assert format_trials.tuple_to_trial((("asdfa", 2), 0, 3.5), space) in space + assert format_trials.tuple_to_trial((("asdfa", 2), 7, 3.5), space) not in space + + def test_hierarchical_register_and_contain(self): + """Register hierarchical dimensions and check if points/name are in space.""" + space = Space() + + categories = {"asdfa": 0.1, 2: 0.2, 3: 0.3, 4: 0.4} + dim = Categorical("yolo.nested", categories, shape=2) + space.register(dim) + dim = Integer("yolo2.nested", "uniform", -3, 6) + space.register(dim) + dim = Real("yolo3", "norm", 0.9) + space.register(dim) + + trial = Trial( + params=[ + {"name": "yolo.nested", "value": ["asdfa", 2], "type": "categorical"}, + {"name": "yolo2.nested", "value": 1, "type": "integer"}, + {"name": "yolo3", "value": 0.5, "type": "real"}, + ] + ) + + assert "yolo" in trial.params + assert "nested" in trial.params["yolo"] + assert "yolo2" in trial.params + assert "nested" in trial.params["yolo2"] + assert "yolo3" in trial.params + + assert trial in space + + def test_sample(self): + """Check whether sampling works correctly.""" + seed = 5 + space = Space() + probs = (0.1, 0.2, 0.3, 0.4) + categories = ("asdfa", 2, 3, 4) + dim1 = Categorical("yolo", OrderedDict(zip(categories, probs)), shape=(2, 2)) + space.register(dim1) + dim2 = Integer("yolo2", "uniform", -3, 6) + space.register(dim2) + dim3 = Real("yolo3", "norm", 0.9) + space.register(dim3) + + point = space.sample(seed=seed) + rng = check_random_state(seed) + test_point = [ + dict( + yolo=dim1.sample(seed=rng)[0], + yolo2=dim2.sample(seed=rng)[0], + yolo3=dim3.sample(seed=rng)[0], + ) + ] + assert len(point) == len(test_point) == 1 + assert len(point[0].params) == len(test_point[0]) == 3 + assert np.all(point[0].params["yolo"] == test_point[0]["yolo"]) + assert point[0].params["yolo2"] == test_point[0]["yolo2"] + assert point[0].params["yolo3"] == test_point[0]["yolo3"] + + points = space.sample(2, seed=seed) + rng = check_random_state(seed) + points1 = dim1.sample(2, seed=rng) + points2 = dim2.sample(2, seed=rng) + points3 = dim3.sample(2, seed=rng) + test_points = [ + dict(yolo=points1[0], yolo2=points2[0], yolo3=points3[0]), + dict(yolo=points1[1], yolo2=points2[1], yolo3=points3[1]), + ] + assert len(points) == len(test_points) == 2 + for i in range(2): + assert len(points[i].params) == len(test_points[i]) == 3 + assert np.all(points[i].params["yolo"] == test_points[i]["yolo"]) + assert points[i].params["yolo2"] == test_points[i]["yolo2"] + assert points[i].params["yolo3"] == test_points[i]["yolo3"] + + def test_interval(self): + """Check whether interval is cool.""" + space = Space() + probs = (0.1, 0.2, 0.3, 0.4) + categories = ("asdfa", 2, 3, 4) + dim = Categorical("yolo", OrderedDict(zip(categories, probs)), shape=2) + space.register(dim) + dim = Integer("yolo2", "uniform", -3, 6) + space.register(dim) + dim = Real("yolo3", "norm", 0.9) + space.register(dim) + + assert space.interval() == [categories, (-3, 3), (-np.inf, np.inf)] + + def test_cardinality(self): + """Check whether space capacity is correct""" + space = Space() + probs = (0.1, 0.2, 0.3, 0.4) + categories = ("asdfa", 2, 3, 4) + dim = Categorical("yolo", OrderedDict(zip(categories, probs)), shape=2) + space.register(dim) + dim = Integer("yolo2", "uniform", -3, 6) + space.register(dim) + dim = Fidelity("epoch", 1, 9, 3) + space.register(dim) + + assert space.cardinality == (4 ** 2) * (6 + 1) * 1 + + dim = Integer("yolo3", "uniform", -3, 2, shape=(3, 2)) + space.register(dim) + assert space.cardinality == (4 ** 2) * (6 + 1) * 1 * ((2 + 1) ** (3 * 2)) + + dim = Real("yolo4", "norm", 0.9) + space.register(dim) + assert np.inf == space.cardinality + + def test_bad_setitem(self): + """Check exceptions in setting items in Space.""" + space = Space() + + # The name of an integer must be a of `str` type. + # Integers are reversed for indexing the OrderedDict. + with pytest.raises(TypeError) as exc: + space[5] = Integer("yolo", "uniform", -3, 6) + assert "string" in str(exc.value) + + # Only object of type `Dimension` are allowed in `Space`. + with pytest.raises(TypeError) as exc: + space["ispis"] = "nope" + assert "Dimension" in str(exc.value) + + # Cannot register something with the same name. + space.register(Integer("yolo", "uniform", -3, 6)) + with pytest.raises(ValueError) as exc: + space.register(Real("yolo", "uniform", 0, 6)) + assert "another name" in str(exc.value) + + def test_getitem(self): + """Test getting dimensions from space.""" + space = Space() + probs = (0.1, 0.2, 0.3, 0.4) + categories = ("asdfa", 2, 3, 4) + dim = Categorical("yolo", OrderedDict(zip(categories, probs)), shape=2) + space.register(dim) + dim = Integer("yolo2", "uniform", -3, 6) + space.register(dim) + dim = Real("yolo3", "norm", 0.9) + space.register(dim) + + assert space["yolo"].type == "categorical" + assert space[0].type == "categorical" + + with pytest.raises(KeyError): + space["asdf"] + + with pytest.raises(IndexError): + space[3] + + def test_order(self): + """Test that the same space built twice will have the same ordering.""" + space1 = Space() + space1.register(Integer("yolo1", "uniform", -3, 6, shape=(2,))) + space1.register(Integer("yolo2", "uniform", -3, 6, shape=(2,))) + space1.register(Real("yolo3", "norm", 0.9)) + space1.register(Categorical("yolo4", ("asdfa", 2))) + + space2 = Space() + space2.register(Integer("yolo1", "uniform", -3, 6, shape=(2,))) + space2.register(Real("yolo3", "norm", 0.9)) + space2.register(Categorical("yolo4", ("asdfa", 2))) + space2.register(Integer("yolo2", "uniform", -3, 6, shape=(2,))) + + assert list(space1) == list(space1.keys()) + assert list(space2) == list(space2.keys()) + assert list(space1.values()) == list(space2.values()) + assert list(space1.items()) == list(space2.items()) + assert list(space1.keys()) == list(space2.keys()) + assert list(space1.values()) == list(space2.values()) + assert list(space1.items()) == list(space2.items()) + + def test_repr(self): + """Test str/repr.""" + space = Space() + dim = Integer("yolo2", "uniform", -3, 6, shape=(2,)) + space.register(dim) + dim = Real("yolo3", "norm", 0.9) + space.register(dim) + + assert ( + str(space) == "Space([" + "Integer(name=yolo2, prior={uniform: (-3, 6), {}}, shape=(2,), " + "default value=None),\n" + " Real(name=yolo3, prior={norm: (0.9,), {}}, shape=(), " + "default value=None)])" + ) + + def test_configuration(self): + """Test that configuration contains all dimensions.""" + space = Space() + space.register(Integer("yolo1", "uniform", -3, 6, shape=(2,))) + space.register(Integer("yolo2", "uniform", -3, 6, shape=(2,))) + space.register(Real("yolo3", "norm", 0.9)) + space.register(Categorical("yolo4", ("asdfa", 2))) + + assert space.configuration == { + "yolo1": "uniform(-3, 3, shape=(2,), discrete=True)", + "yolo2": "uniform(-3, 3, shape=(2,), discrete=True)", + "yolo3": "normal(0.9)", + "yolo4": "choices(['asdfa', 2])", + } + + def test_precision(self): + """Test that precision is correctly handled.""" + space = Space() + space.register(Real("yolo1", "norm", 0.9, precision=6)) + space.register(Real("yolo2", "norm", 0.9, precision=None)) + space.register(Real("yolo5", "norm", 0.9)) + + assert space["yolo1"].precision == 6 + assert space["yolo2"].precision is None + assert space["yolo5"].precision == 4 + + with pytest.raises(TypeError): + space.register(Real("yolo3", "norm", 0.9, precision=-12)) + + with pytest.raises(TypeError): + space.register(Real("yolo4", "norm", 0.9, precision=0.6)) diff --git a/tests/orion/test_space_builder.py b/tests/orion/test_space_builder.py new file mode 100644 index 0000000..545723f --- /dev/null +++ b/tests/orion/test_space_builder.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Example usage and tests for :mod:`orion.core.io.space_builder`.""" +import pytest +from scipy.stats import distributions as dists + +from sspace.orion.space import Categorical, Fidelity, Integer, Real +from sspace.orion.builder import DimensionBuilder, SpaceBuilder + + +@pytest.fixture(scope="module") +def dimbuilder(): + """Return a `DimensionBuilder` instance.""" + return DimensionBuilder() + + +@pytest.fixture(scope="module") +def spacebuilder(): + """Return a `SpaceBuilder` instance.""" + return SpaceBuilder() + + +class TestDimensionBuilder(object): + """Ways of Dimensions builder.""" + + def test_build_loguniform(self, dimbuilder): + """Check that loguniform is built into reciprocal correctly.""" + dim = dimbuilder.build("yolo", "loguniform(0.001, 10)") + assert isinstance(dim, Real) + assert dim.name == "yolo" + assert dim._prior_name == "reciprocal" + assert 3.3 in dim and 11.1 not in dim + assert isinstance(dim.prior, dists.reciprocal_gen) + + dim = dimbuilder.build("yolo2", "loguniform(1, 1000, discrete=True)") + assert isinstance(dim, Integer) + assert dim.name == "yolo2" + assert dim._prior_name == "reciprocal" + assert 3 in dim and 0 not in dim and 3.3 not in dim + assert isinstance(dim.prior, dists.reciprocal_gen) + + def test_eval_nonono(self, dimbuilder): + """Make malevolent/naive eval access more difficult. I think.""" + with pytest.raises(RuntimeError): + dimbuilder.build("la", "__class__") + + def test_build_a_good_real(self, dimbuilder): + """Check that non registered names are good, as long as they are in + `scipy.stats.distributions`. + """ + dim = dimbuilder.build("yolo2", "alpha(0.9, low=0, high=10, shape=2)") + assert isinstance(dim, Real) + assert dim.name == "yolo2" + assert dim._prior_name == "alpha" + assert 3.3 not in dim + assert (3.3, 11.1) not in dim + assert (3.3, 6) in dim + assert isinstance(dim.prior, dists.alpha_gen) + + def test_build_a_good_integer(self, dimbuilder): + """Check that non registered names are good, as long as they are in + `scipy.stats.distributions`. + """ + dim = dimbuilder.build("yolo3", "poisson(5)") + assert isinstance(dim, Integer) + assert dim.name == "yolo3" + assert dim._prior_name == "poisson" + assert isinstance(dim.prior, dists.poisson_gen) + + def test_build_a_good_real_discrete(self, dimbuilder): + """Check that non registered names are good, as long as they are in + `scipy.stats.distributions`. + """ + dim = dimbuilder.build("yolo3", "alpha(1.1, discrete=True)") + assert isinstance(dim, Integer) + assert dim.name == "yolo3" + assert dim._prior_name == "alpha" + assert isinstance(dim.prior, dists.alpha_gen) + + def test_build_a_good_fidelity(self, dimbuilder): + """Check that a Fidelity dimension is correctly built.""" + dim = dimbuilder.build("epoch", "fidelity(1, 16, 4)") + assert isinstance(dim, Fidelity) + assert dim.name == "epoch" + assert dim.low == 1 + assert dim.high == 16 + assert dim.base == 4 + assert dim._prior_name == "None" + assert dim.prior is None + + def test_build_fidelity_default_base(self, dimbuilder): + """Check that a Fidelity dimension is correctly built with default base.""" + dim = dimbuilder.build("epoch", "fidelity(1, 16)") + assert isinstance(dim, Fidelity) + assert dim.name == "epoch" + assert dim.low == 1 + assert dim.high == 16 + assert dim.base == 2 + assert dim._prior_name == "None" + assert dim.prior is None + + def test_build_fails_because_of_name(self, dimbuilder): + """Build fails because distribution name is not supported...""" + with pytest.raises(TypeError) as exc: + dimbuilder.build("yolo3", "lalala(1.1, discrete=True)") + assert "Parameter" in str(exc.value) + assert "supported" in str(exc.value) + + def test_build_fails_because_of_unexpected_args(self, dimbuilder): + """Build fails because argument is not supported...""" + with pytest.raises(TypeError) as exc: + dimbuilder.build("yolo3", "alpha(1.1, whatisthis=5, discrete=True)") + assert "Parameter" in str(exc.value) + assert "unexpected" in str(exc.value.__cause__) + + def test_build_fails_because_of_ValueError_on_run(self, dimbuilder): + """Build fails because ValueError happens on init.""" + with pytest.raises(TypeError) as exc: + dimbuilder.build("yolo2", "alpha(0.9, low=5, high=6, shape=2)") + assert "Parameter" in str(exc.value) + assert "Improbable bounds" in str(exc.value.__cause__) + + def test_build_fails_because_of_ValueError_on_init(self, dimbuilder): + """Build fails because ValueError happens on init.""" + with pytest.raises(TypeError) as exc: + dimbuilder.build("yolo2", "alpha(0.9, low=4, high=10, size=2)") + assert "Parameter" in str(exc.value) + assert "size" in str(exc.value.__cause__) + + def test_build_gaussian(self, dimbuilder): + """Check that gaussian/normal/norm is built into reciprocal correctly.""" + dim = dimbuilder.build("yolo", "gaussian(3, 5)") + assert isinstance(dim, Real) + assert dim.name == "yolo" + assert dim._prior_name == "norm" + assert isinstance(dim.prior, dists.norm_gen) + + dim = dimbuilder.build("yolo2", "gaussian(1, 0.5, discrete=True)") + assert isinstance(dim, Integer) + assert dim.name == "yolo2" + assert dim._prior_name == "norm" + assert isinstance(dim.prior, dists.norm_gen) + + def test_build_normal(self, dimbuilder): + """Check that gaussian/normal/norm is built into reciprocal correctly.""" + dim = dimbuilder.build("yolo", "normal(0.001, 10)") + assert isinstance(dim, Real) + assert dim.name == "yolo" + assert dim._prior_name == "norm" + assert isinstance(dim.prior, dists.norm_gen) + + dim = dimbuilder.build("yolo2", "normal(1, 0.5, discrete=True)") + assert isinstance(dim, Integer) + assert dim.name == "yolo2" + assert dim._prior_name == "norm" + assert isinstance(dim.prior, dists.norm_gen) + + def test_build_choices(self, dimbuilder): + """Create correctly a `Categorical` dimension.""" + dim = dimbuilder.build("yolo", "choices('adfa', 1, 0.3, 'asaga', shape=4)") + assert isinstance(dim, Categorical) + assert dim.name == "yolo" + assert dim._prior_name == "Distribution" + assert isinstance(dim.prior, dists.rv_discrete) + + dim = dimbuilder.build("yolo", "choices(['adfa', 1])") + assert isinstance(dim, Categorical) + assert dim.name == "yolo" + assert dim._prior_name == "Distribution" + assert isinstance(dim.prior, dists.rv_discrete) + + dim = dimbuilder.build("yolo2", "choices({'adfa': 0.1, 3: 0.4, 5: 0.5})") + assert isinstance(dim, Categorical) + assert dim.name == "yolo2" + assert dim._prior_name == "Distribution" + assert isinstance(dim.prior, dists.rv_discrete) + + with pytest.raises(TypeError) as exc: + dimbuilder.build("yolo2", "choices({'adfa': 0.1, 3: 0.4})") + assert "Parameter" in str(exc.value) + assert "sum" in str(exc.value.__cause__) + + def test_build_fails_because_empty_args(self, dimbuilder): + """What happens if somebody 'forgets' stuff?""" + with pytest.raises(TypeError) as exc: + dimbuilder.build("yolo", "choices()") + assert "Parameter" in str(exc.value) + assert "categories" in str(exc.value) + + with pytest.raises(TypeError) as exc: + dimbuilder.build("what", "alpha()") + assert "Parameter" in str(exc.value) + assert "positional" in str(exc.value.__cause__) + + def test_build_fails_because_troll(self, dimbuilder): + """What happens if somebody does not fit regular expression expected?""" + with pytest.raises(TypeError) as exc: + dimbuilder.build("yolo", "lalalala") + assert "Parameter" in str(exc.value) + assert "form for prior" in str(exc.value) + + +class TestSpaceBuilder(object): + """Check whether space definition from various input format is successful.""" + + def test_configuration_rebuild(self, spacebuilder): + """Test that configuration can be used to recreate a space.""" + prior = { + "x": "uniform(0, 10, discrete=True)", + "y": "loguniform(1e-08, 1)", + "z": "choices(['voici', 'voila', 2])", + } + space = spacebuilder.build(prior) + assert space.configuration == prior + + def test_subdict_dimensions(self, spacebuilder): + """Test space can have hierarchical structure.""" + prior = { + "a": {"x": "uniform(0, 10, discrete=True)"}, + "b": {"y": "loguniform(1e-08, 1)", "z": "choices(['voici', 'voila', 2])"}, + } + space = spacebuilder.build(prior) + assert len(space) == 3 + assert "a.x" in space + assert "b.y" in space + assert "b.z" in space diff --git a/tests/orion/test_transformer.py b/tests/orion/test_transformer.py new file mode 100644 index 0000000..0669b27 --- /dev/null +++ b/tests/orion/test_transformer.py @@ -0,0 +1,1435 @@ +# -*: utf-8 -*- +"""Collection of tests for :mod:`orion.core.worker.transformer`.""" +import copy +import itertools +from collections import OrderedDict + +import numpy +import pytest + +from sspace.orion.space import Categorical, Dimension, Integer, Real, Space +from sspace.orion.utils import tuple_to_dict +from sspace.orion.transformer import ( + Compose, + Enumerate, + Identity, + Linearize, + OneHotEncode, + Precision, + Quantize, + ReshapedDimension, + ReshapedSpace, + Reverse, + TransformedDimension, + TransformedSpace, + View, + build_required_space, + change_trial_params, +) + + +from sspace.orion.legacy import Param, Trial, format_trials + + +class TestIdentity(object): + """Test subclasses of `Identity` transformation.""" + + def test_deepcopy(self): + """Verify that the transformation object can be copied""" + t = Identity() + t.transform([2]) + copy.deepcopy(t) + + def test_domain_and_target_type(self): + """Check if attribute-like `domain_type` and `target_type` do + what's expected. + """ + t = Identity() + assert t.domain_type is None + assert t.target_type is None + + t = Identity("mpogias") + assert t.domain_type == "mpogias" + assert t.target_type == "mpogias" + + def test_transform(self): + """Check if it transforms properly.""" + t = Identity() + assert t.transform("yo") == "yo" + + def test_reverse(self): + """Check if it reverses `transform` properly, if possible.""" + t = Identity() + assert t.reverse("yo") == "yo" + + def test_infer_target_shape(self): + """Check if it infers the shape of a transformed `Dimension`.""" + t = Identity() + assert t.infer_target_shape((5,)) == (5,) + + def test_repr_format(self): + """Check representation of a transformed dimension.""" + t = Identity() + assert t.repr_format("asfa") == "asfa" + + +class TestReverse(object): + """Test subclasses of `Reverse` transformation.""" + + def test_deepcopy(self): + """Verify that the transformation object can be copied""" + t = Reverse(Quantize()) + t.transform([2]) + copy.deepcopy(t) + + def test_domain_and_target_type(self): + """Check if attribute-like `domain_type` and `target_type` do + what's expected. + """ + t = Reverse(Quantize()) + assert t.domain_type == "integer" + assert t.target_type == "real" + + def test_transform(self): + """Check if it transforms properly.""" + t = Reverse(Quantize()) + assert t.transform(9) == 9.0 + assert t.transform(5) == 5.0 + assert numpy.all(t.transform([9, 5]) == numpy.array([9.0, 5.0], dtype=float)) + + def test_reverse(self): + """Check if it reverses `transform` properly, if possible.""" + t = Reverse(Quantize()) + assert t.reverse(8.6) == 9 + assert t.reverse(8.4) == 8 + assert t.reverse(5.3) == 5 + assert numpy.all(t.reverse([8.6, 5.3]) == numpy.array([9, 5], dtype=int)) + + def test_infer_target_shape(self): + """Check if it infers the shape of a transformed `Dimension`.""" + t = Reverse(Quantize()) + assert t.infer_target_shape((5,)) == (5,) + + def test_no_reverse_one_hot_encode(self): + """Do NOT support real to categorical.""" + with pytest.raises(AssertionError): + Reverse(OneHotEncode([1, 2, 3])) + + def test_repr_format(self): + """Check representation of a transformed dimension.""" + t = Reverse(Quantize()) + assert t.repr_format("asfa") == "ReverseQuantize(asfa)" + + +class TestCompose(object): + """Test subclasses of `Compose` transformation.""" + + def test_deepcopy(self): + """Verify that the transformation object can be copied""" + t = Compose([Enumerate([2, "asfa", "ipsi"]), OneHotEncode(3)], "categorical") + t.transform([2]) + copy.deepcopy(t) + + def test_domain_and_target_type(self): + """Check if attribute-like `domain_type` and `target_type` do + what's expected. + """ + t = Compose([]) + assert t.domain_type is None + assert t.target_type is None + + t = Compose([], "real") + assert t.domain_type == "real" + assert t.target_type == "real" + + t = Compose([Quantize()], "real") + assert t.domain_type == "real" + assert t.target_type == "integer" + + t = Compose([Enumerate([2, "asfa", "ipsi"]), OneHotEncode(3)], "categorical") + assert t.domain_type == "categorical" + assert t.target_type == "real" + + def test_transform(self): + """Check if it transforms properly.""" + t = Compose([Enumerate([2, "asfa", "ipsi"]), OneHotEncode(3)], "categorical") + assert numpy.all(t.transform(2) == numpy.array((1.0, 0.0, 0.0))) + assert numpy.all(t.transform("asfa") == numpy.array((0.0, 1.0, 0.0))) + assert numpy.all(t.transform("ipsi") == numpy.array((0.0, 0.0, 1.0))) + with pytest.raises(KeyError): + t.transform("aafdasfa") + assert numpy.all( + t.transform([["ipsi", "asfa"], [2, "ipsi"]]) + == numpy.array( + [[(0.0, 0.0, 1.0), (0.0, 1.0, 0.0)], [(1.0, 0.0, 0.0), (0.0, 0.0, 1.0)]] + ) + ) + + t = Compose([Enumerate([2, "asfa"]), OneHotEncode(2)], "categorical") + assert t.transform(2) == 0.0 + assert t.transform("asfa") == 1.0 + with pytest.raises(KeyError): + t.transform("ipsi") + assert numpy.all( + t.transform([["asfa", "asfa"], [2, "asfa"]]) + == numpy.array([[1.0, 1.0], [0.0, 1.0]]) + ) + + # for the crazy enough + t = Compose([Enumerate([2]), OneHotEncode(1)], "categorical") + assert t.transform(2) == 0.0 + with pytest.raises(KeyError): + t.transform("ipsi") + assert numpy.all(t.transform([[2, 2], [2, 2]]) == [[0, 0], [0, 0]]) + + def test_reverse(self): + """Check if it reverses `transform` properly, if possible.""" + t = Compose([Enumerate([2, "asfa", "ipsi"]), OneHotEncode(3)], "categorical") + assert t.reverse((0.9, 0.8, 0.3)) == 2 + assert t.reverse((-0.3, 2.0, 0.0)) == "asfa" + assert t.reverse((0.0, 0.0, 1.0)) == "ipsi" + with pytest.raises(AssertionError): + t.reverse((0.0, 0.0, 0.0, 1.0)) + assert numpy.all( + t.reverse( + numpy.array( + [ + [(0.0, 0.0, 1.0), (0.0, 1.0, 0.0)], + [(1.0, 0.0, 0.0), (0.0, 0.0, 1.0)], + ] + ) + ) + == numpy.array([["ipsi", "asfa"], [2, "ipsi"]], dtype=numpy.object) + ) + + t = Compose([Enumerate([2, "asfa"]), OneHotEncode(2)], "categorical") + assert t.reverse(0.3) == 2 + assert t.reverse(2.0) == "asfa" + assert numpy.all( + t.reverse((0.0, 0.0, 0.0, 1.0)) + == numpy.array([2, 2, 2, "asfa"], dtype=numpy.object) + ) + assert numpy.all( + t.reverse(numpy.array([[0.55, 3.0], [-0.6, 1.0]])) + == numpy.array([["asfa", "asfa"], [2, "asfa"]], dtype=numpy.object) + ) + + # for the crazy enough + t = Compose([Enumerate([2]), OneHotEncode(1)], "categorical") + assert t.reverse(0) == 2 + assert t.reverse(5.0) == 2 + assert t.reverse(0.2) == 2 + assert t.reverse(-0.2) == 2 + assert numpy.all( + t.reverse([[0.5, 0], [1.0, 55]]) + == numpy.array([[2, 2], [2, 2]], dtype=numpy.object) + ) + + def test_infer_target_shape(self): + """Check if it infers the shape of a transformed `Dimension`.""" + t = Compose([Enumerate([2, "asfa", "ipsi"]), OneHotEncode(3)], "categorical") + assert t.infer_target_shape((2, 5)) == (2, 5, 3) + + t = Compose([Enumerate([2, "asfa"]), OneHotEncode(2)], "categorical") + assert t.infer_target_shape((2, 5)) == (2, 5) + + t = Compose([Enumerate([2]), OneHotEncode(1)], "categorical") + assert t.infer_target_shape((2, 5)) == (2, 5) + + def test_repr_format(self): + """Check representation of a transformed dimension.""" + t = Compose([Enumerate([2, "asfa", "ipsi"]), OneHotEncode(3)], "categorical") + assert t.repr_format("asfa") == "OneHotEncode(Enumerate(asfa))" + + +class TestPrecision(object): + """Test subclasses of `Precision` transformation.""" + + def test_deepcopy(self): + """Verify that the transformation object can be copied""" + t = Precision() + t.transform([2]) + copy.deepcopy(t) + + def test_domain_and_target_type(self): + """Check if attribute-like `domain_type` and `target_type` do + what's expected. + """ + t = Precision() + assert t.domain_type == "real" + assert t.target_type == "real" + + def test_transform(self): + """Check if it transforms properly.""" + t = Precision(precision=4) + assert t.transform(8.654321098) == 8.654 + assert t.transform(0.000123456789) == 0.0001235 + assert numpy.all( + t.transform([8.654321098, 0.000123456789]) + == numpy.array([8.654, 0.0001235], dtype=float) + ) + + def test_reverse(self): + """Check if it reverses `transform` properly, if possible.""" + t = Precision() + assert t.reverse(9.0) == 9.0 + assert t.reverse(5.0) == 5.0 + assert numpy.all(t.reverse([9.0, 5.0]) == numpy.array([9.0, 5.0], dtype=float)) + + def test_infer_target_shape(self): + """Check if it infers the shape of a transformed `Dimension`.""" + t = Precision() + assert t.infer_target_shape((5,)) == (5,) + + def test_repr_format(self): + """Check representation of a transformed dimension.""" + t = Precision() + assert t.repr_format("asfa") == "Precision(4, asfa)" + + +class TestQuantize(object): + """Test subclasses of `Quantize` transformation.""" + + def test_deepcopy(self): + """Verify that the transformation object can be copied""" + t = Quantize() + t.transform([2]) + copy.deepcopy(t) + + def test_domain_and_target_type(self): + """Check if attribute-like `domain_type` and `target_type` do + what's expected. + """ + t = Quantize() + assert t.domain_type == "real" + assert t.target_type == "integer" + + def test_transform(self): + """Check if it transforms properly.""" + t = Quantize() + assert t.transform(8.6) == 9 + assert t.transform(8.4) == 8 + assert t.transform(5.3) == 5 + assert numpy.all(t.transform([8.6, 5.3]) == numpy.array([9, 5], dtype=int)) + + def test_reverse(self): + """Check if it reverses `transform` properly, if possible.""" + t = Quantize() + assert t.reverse(9) == 9.0 + assert t.reverse(5) == 5.0 + assert numpy.all(t.reverse([9, 5]) == numpy.array([9.0, 5.0], dtype=float)) + + def test_infer_target_shape(self): + """Check if it infers the shape of a transformed `Dimension`.""" + t = Quantize() + assert t.infer_target_shape((5,)) == (5,) + + def test_repr_format(self): + """Check representation of a transformed dimension.""" + t = Quantize() + assert t.repr_format("asfa") == "Quantize(asfa)" + + +class TestEnumerate(object): + """Test subclasses of `Enumerate` transformation.""" + + def test_deepcopy(self): + """Verify that the transformation object can be copied""" + t = Enumerate([2, "asfa", "ipsi"]) + # Copy won't fail if vectorized function is not called at least once. + t.transform([2]) + copy.deepcopy(t) + + def test_domain_and_target_type(self): + """Check if attribute-like `domain_type` and `target_type` do + what's expected. + """ + t = Enumerate([2, "asfa", "ipsi"]) + assert t.domain_type == "categorical" + assert t.target_type == "integer" + + def test_transform(self): + """Check if it transforms properly.""" + t = Enumerate([2, "asfa", "ipsi"]) + assert t.transform(2) == 0 + assert t.transform("asfa") == 1 + assert t.transform("ipsi") == 2 + with pytest.raises(KeyError): + t.transform("aafdasfa") + assert numpy.all( + t.transform([["ipsi", "asfa"], [2, "ipsi"]]) == [[2, 1], [0, 2]] + ) + + # for the crazy enough + t = Enumerate([2]) + assert t.transform(2) == 0 + with pytest.raises(KeyError): + t.transform("aafdasfa") + assert numpy.all(t.transform([[2, 2], [2, 2]]) == [[0, 0], [0, 0]]) + + def test_reverse(self): + """Check if it reverses `transform` properly, if possible.""" + t = Enumerate([2, "asfa", "ipsi"]) + assert t.reverse(0) == 2 + assert t.reverse(1) == "asfa" + assert t.reverse(2) == "ipsi" + with pytest.raises(IndexError): + t.reverse(3) + assert numpy.all( + t.reverse([[2, 1], [0, 2]]) + == numpy.array([["ipsi", "asfa"], [2, "ipsi"]], dtype=numpy.object) + ) + + # for the crazy enough + t = Enumerate([2]) + assert t.reverse(0) == 2 + with pytest.raises(IndexError): + t.reverse(1) + assert numpy.all( + t.reverse([[0, 0], [0, 0]]) + == numpy.array([[2, 2], [2, 2]], dtype=numpy.object) + ) + + def test_infer_target_shape(self): + """Check if it infers the shape of a transformed `Dimension`.""" + t = Enumerate([2, "asfa", "ipsi"]) + assert t.infer_target_shape((5,)) == (5,) + + def test_repr_format(self): + """Check representation of a transformed dimension.""" + t = Enumerate([2, "asfa", "ipsi"]) + assert t.repr_format("asfa") == "Enumerate(asfa)" + + +class TestOneHotEncode(object): + """Test subclasses of `OneHotEncode` transformation.""" + + def test_deepcopy(self): + """Verify that the transformation object can be copied""" + t = OneHotEncode(3) + t.transform([2]) + copy.deepcopy(t) + + def test_domain_and_target_type(self): + """Check if attribute-like `domain_type` and `target_type` do + what's expected. + """ + t = OneHotEncode(3) + assert t.domain_type == "integer" + assert t.target_type == "real" + + def test_transform(self): + """Check if it transforms properly.""" + t = OneHotEncode(3) + assert numpy.all(t.transform(0) == numpy.array((1.0, 0.0, 0.0))) + assert numpy.all(t.transform(1) == numpy.array((0.0, 1.0, 0.0))) + assert numpy.all(t.transform(2) == numpy.array((0.0, 0.0, 1.0))) + with pytest.raises(AssertionError): + t.transform(4) + with pytest.raises(AssertionError): + t.transform(-1) + with pytest.raises(AssertionError): + t.transform(2.2) + assert numpy.all( + t.transform([[2, 1], [0, 2]]) + == numpy.array( + [[(0.0, 0.0, 1.0), (0.0, 1.0, 0.0)], [(1.0, 0.0, 0.0), (0.0, 0.0, 1.0)]] + ) + ) + + t = OneHotEncode(2) + assert t.transform(0) == 0.0 + assert t.transform(1) == 1.0 + with pytest.raises(TypeError): + t.transform("ipsi") + assert numpy.all( + t.transform([[1, 1], [0, 1]]) == numpy.array([[1.0, 1.0], [0.0, 1.0]]) + ) + + # for the crazy enough + t = OneHotEncode(1) + assert t.transform(0) == 0.0 + with pytest.raises(TypeError): + t.transform("ipsi") + assert numpy.all(t.transform([[0, 0], [0, 0]]) == [[0.0, 0.0], [0.0, 0.0]]) + + def test_reverse(self): + """Check if it reverses `transform` properly, if possible.""" + t = OneHotEncode(3) + assert t.reverse((0.9, 0.8, 0.3)) == 0 + assert t.reverse((-0.3, 2.0, 0.0)) == 1 + assert t.reverse((0.0, 0.0, 1.0)) == 2 + with pytest.raises(AssertionError): + t.reverse((0.0, 0.0, 0.0, 1.0)) + assert numpy.all( + t.reverse( + numpy.array( + [ + [[0.0, 0.0, 1.0], [0.0, 1.0, 0.0]], + [[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]], + ] + ) + ) + == numpy.array([[2, 1], [0, 2]], dtype=int) + ) + + t = OneHotEncode(2) + assert t.reverse(0.3) == 0 + assert t.reverse(2.0) == 1 + assert numpy.all( + t.reverse((0.0, 0.0, 0.0, 1.0)) == numpy.array([0, 0, 0, 1], dtype=int) + ) + assert numpy.all( + t.reverse(numpy.array([[0.55, 3.0], [-0.6, 1.0]])) + == numpy.array([[1, 1], [0, 1]], dtype=int) + ) + + # for the crazy enough + t = OneHotEncode(1) + assert t.reverse(0) == 0 + assert t.reverse(5.0) == 0 + assert t.reverse(0.2) == 0 + assert t.reverse(-0.2) == 0 + assert numpy.all( + t.reverse([[0.5, 0], [1.0, 55]]) == numpy.array([[0, 0], [0, 0]], dtype=int) + ) + + def test_interval(self): + """Test that the onehot interval has the proper dimensions""" + t = OneHotEncode(3) + low, high = t.interval() + assert (low == numpy.zeros(3)).all() + assert (high == numpy.ones(3)).all() + + t = OneHotEncode(2) + low, high = t.interval() + assert (low == numpy.zeros(1)).all() + assert (high == numpy.ones(1)).all() + + def test_infer_target_shape(self): + """Check if it infers the shape of a transformed `Dimension`.""" + t = OneHotEncode(3) + assert t.infer_target_shape((2, 5)) == (2, 5, 3) + + t = OneHotEncode(2) + assert t.infer_target_shape((2, 5)) == (2, 5) + + t = OneHotEncode(1) + assert t.infer_target_shape((2, 5)) == (2, 5) + + def test_repr_format(self): + """Check representation of a transformed dimension.""" + t = OneHotEncode(3) + assert t.repr_format("asfa") == "OneHotEncode(asfa)" + + +class TestLinearize(object): + """Test subclasses of `Linearize` transformation.""" + + def test_domain_and_target_type(self): + """Check if attribute-like `domain_type` and `target_type` do + what's expected. + """ + t = Linearize() + assert t.domain_type == "real" + assert t.target_type == "real" + + def test_transform(self): + """Check if it transforms properly.""" + t = Linearize() + assert t.transform(numpy.e) == 1 + t.transform(0) + + def test_reverse(self): + """Check if it reverses `transform` properly.""" + t = Linearize() + assert t.reverse(1) == numpy.e + + def test_repr_format(self): + """Check representation of a transformed dimension.""" + t = Linearize() + assert t.repr_format(1.0) == "Linearize(1.0)" + + +class TestView(object): + """Test subclasses of `View` transformation.""" + + def test_domain_and_target_type(self): + """Check if attribute-like `domain_type` and `target_type` do what's expected.""" + t = View(shape=None, index=None, domain_type="some fancy type") + assert t.domain_type == "some fancy type" + assert t.target_type == "some fancy type" + + def test_transform(self): + """Check if it transforms properly.""" + shape = (3, 4, 5) + index = (0, 2, 1) + t = View(shape=shape, index=index) + a = numpy.zeros(shape) + a[index] = 2 + assert t.transform(a) == 2 + + def test_reverse(self): + """Check if it reverses `transform` properly.""" + shape = (3, 4, 5) + index = (0, 2, 1) + a = numpy.zeros(shape) + a[index] = 2 + flattened = a.reshape(-1).tolist() + point = [None] + flattened + [None] + t = View(shape=shape, index=(0, 0, 0)) + numpy.testing.assert_equal(t.reverse(point, 1), a) + + def test_first(self): + """Test that views are correctly identified as first""" + shape = (3, 4, 5) + assert View(shape=shape, index=(0, 0, 0)).first + assert not View(shape=shape, index=(0, 1, 0)).first + + def test_repr_format(self): + """Check representation of a transformed dimension.""" + shape = (3, 4, 5) + index = (0, 2, 1) + t = View(shape=shape, index=index) + assert t.repr_format(1.0) == "View(shape=(3, 4, 5), index=(0, 2, 1), 1.0)" + + +@pytest.fixture() +def dim(): + """Create an example of `Dimension`.""" + dim = Real("yolo", "norm", 0.9, shape=(3, 2)) + return dim + + +@pytest.fixture() +def logdim(): + """Create an log example of `Dimension`.""" + dim = Real("yolo4", "reciprocal", 1.0, 10.0, shape=(3, 2)) + return dim + + +@pytest.fixture() +def logintdim(): + """Create an log integer example of `Dimension`.""" + dim = Integer("yolo5", "reciprocal", 1, 10, shape=(3, 2)) + return dim + + +@pytest.fixture() +def tdim(dim): + """Create an example of `TransformedDimension`.""" + transformers = [Quantize()] + tdim = TransformedDimension(Compose(transformers, dim.type), dim) + return tdim + + +@pytest.fixture() +def rdims(tdim): + """Create an example of `ReshapedDimension`.""" + transformations = {} + for index in itertools.product(*map(range, tdim.shape)): + key = f'{tdim.name}[{",".join(map(str, index))}]' + transformations[key] = ReshapedDimension( + transformer=View(tdim.shape, index, tdim.type), + original_dimension=tdim, + name=key, + index=0, + ) + + return transformations + + +@pytest.fixture() +def rdim(dim, rdims): + """Single ReshapedDimension""" + return rdims[f"{dim.name}[0,1]"] + + +@pytest.fixture() +def dim2(): + """Create a second example of `Dimension`.""" + probs = (0.1, 0.2, 0.3, 0.4) + categories = ("asdfa", "2", "3", "4") + categories = OrderedDict(zip(categories, probs)) + dim2 = Categorical("yolo2", categories, default_value="2") + return dim2 + + +@pytest.fixture() +def tdim2(dim2): + """Create a second example of `TransformedDimension`.""" + transformers = [Enumerate(dim2.categories), OneHotEncode(len(dim2.categories))] + tdim2 = TransformedDimension(Compose(transformers, dim2.type), dim2) + return tdim2 + + +@pytest.fixture() +def rdims2(tdim2): + """Create a categorical example of `ReshapedDimension`.""" + transformations = {} + for index in itertools.product(*map(range, tdim2.shape)): + key = f'{tdim2.name}[{",".join(map(str, index))}]' + transformations[key] = ReshapedDimension( + transformer=View(tdim2.shape, index, tdim2.type), + original_dimension=tdim2, + name=key, + index=1, + ) + + return transformations + + +@pytest.fixture() +def rdim2(dim2, rdims2): + """Single ReshapedDimension""" + return rdims2[f"{dim2.name}[1]"] + + +@pytest.fixture() +def dim3(): + """Create an example of integer `Dimension`.""" + return Integer("yolo3", "uniform", 3, 7, shape=(1,)) + + +@pytest.fixture() +def tdim3(dim3): + """Create an example of integer `Dimension`.""" + return TransformedDimension(Compose([], dim3.type), dim3) + + +@pytest.fixture() +def rdims3(tdim3): + """Create an example of integer `Dimension`.""" + rdim3 = ReshapedDimension( + transformer=View(tdim3.shape, (0,), tdim3.type), + original_dimension=tdim3, + name="yolo3[0]", + index=2, + ) + + return {tdim3.name: rdim3} + + +class TestTransformedDimension(object): + """Check functionality of class `TransformedDimension`.""" + + def test_transform(self, tdim): + """Check method `transform`.""" + assert tdim.transform(8.4) == 8 + assert tdim.transform(5.3) == 5 + assert numpy.all(tdim.transform([8.6, 5.3]) == numpy.array([9, 5], dtype=int)) + + def test_reverse(self, tdim): + """Check method `reverse`.""" + assert tdim.reverse(9) == 9.0 + assert tdim.reverse(5) == 5.0 + assert numpy.all(tdim.reverse([9, 5]) == numpy.array([9.0, 5.0], dtype=float)) + + def test_interval(self, tdim): + """Check method `interval`.""" + tmp1 = tdim.original_dimension._low + tmp2 = tdim.original_dimension._high + tdim.original_dimension._low = -0.6 + tdim.original_dimension._high = 1.2 + + assert tdim.interval() == (-1, 1) + + tdim.original_dimension._low = tmp1 + tdim.original_dimension._high = tmp2 + + def test_interval_from_categorical(self, tdim2): + """Check how we should treat interval when original dimension is categorical.""" + low, high = tdim2.interval() + assert (low == numpy.zeros(4)).all() + assert (high == numpy.ones(4)).all() + + def test_contains(self, tdim): + """Check method `__contains__`.""" + assert [[1, 1], [3, 1], [1, 2]] in tdim + + tmp1 = tdim.original_dimension._low + tmp2 = tdim.original_dimension._high + tdim.original_dimension._low = -0.6 + tdim.original_dimension._high = 1.2 + + assert [[1, 1], [3, 1], [1, 2]] not in tdim + + tdim.original_dimension._low = tmp1 + tdim.original_dimension._high = tmp2 + + def test_contains_from_categorical(self, tdim2): + """Check method `__contains__` when original is categorical.""" + assert (0, 0, 0, 1) in tdim2 + assert (0, 2, 0, 1) in tdim2 + assert (0, 2, 0) not in tdim2 + + def test_eq(self, tdim, tdim2): + """Return True if other is the same transformed dimension as self""" + assert tdim != tdim2 + assert tdim == copy.deepcopy(tdim) + + def test_hash(self, tdim, tdim2): + """Test that hash is consistent for identical and different transformed dimensions""" + assert hash(tdim) != hash(tdim2) + assert hash(tdim) == hash(copy.deepcopy(tdim)) + + def test_get_hashable_members(self, tdim, tdim2): + """Test that hashable members of the transformed dimensions are the aggregation of + transformer's and original dimension's hashable members. + """ + assert tdim._get_hashable_members() == ( + "Compose", + "Quantize", + "real", + "integer", + "Identity", + "real", + "real", + "yolo", + (3, 2), + "real", + (0.9,), + (), + None, + "norm", + ) + assert tdim2._get_hashable_members() == ( + "Compose", + "OneHotEncode", + "integer", + "real", + 4, + "Compose", + "Enumerate", + "categorical", + "integer", + "Identity", + "categorical", + "categorical", + "yolo2", + (), + "categorical", + (), + (), + "2", + "Distribution", + ) + + def test_validate(self, tdim, tdim2): + """Validate original_dimension""" + # It pass + tdim.validate() + tdim2.validate() + + # We break it + tdim.original_dimension._kwargs["size"] = (2,) + tdim2.original_dimension._default_value = "bad-default" + + # It does not pass + with pytest.raises(ValueError) as exc: + tdim.validate() + assert "Use 'shape' keyword only instead of 'size'." in str(exc.value) + + with pytest.raises(ValueError) as exc: + tdim2.validate() + assert "bad-default is not a valid value for this Dimension." in str(exc.value) + + tdim.original_dimension._kwargs.pop("size") + tdim2.original_dimension._default_value = Dimension.NO_DEFAULT_VALUE + + def test_repr(self, tdim): + """Check method `__repr__`.""" + assert ( + str(tdim) + == "Quantize(Real(name=yolo, prior={norm: (0.9,), {}}, shape=(3, 2), default value=None))" + ) # noqa + + def test_name_property(self, tdim): + """Check property `name`.""" + assert tdim.name == "yolo" + + def test_type_property(self, tdim, tdim2): + """Check property `type`.""" + assert tdim.type == "integer" + assert tdim2.type == "real" + + def test_prior_name_property(self, tdim, tdim2): + """Check property `prior_name`.""" + assert tdim.prior_name == "norm" + assert tdim2.prior_name == "choices" + + def test_shape_property(self, tdim, tdim2): + """Check property `shape`.""" + assert tdim.original_dimension.shape == (3, 2) + assert tdim.shape == (3, 2) + assert tdim2.original_dimension.shape == () + assert tdim2.shape == (4,) + + def test_default_value_property(self, tdim, tdim2): + """Check property `default_value`.""" + assert tdim.default_value is Dimension.NO_DEFAULT_VALUE + assert tuple(tdim2.default_value) == (0, 1, 0, 0) + + +class TestReshapedDimension(object): + """Check functionality of class `ReshapedDimension`.""" + + def test_transform(self, rdim): + """Check method `transform`.""" + a = numpy.zeros((3, 2)) + a[0, 1] = 2 + assert rdim.transform(a) == 2 + + def test_reverse(self, rdim): + """Check method `reverse`.""" + a = numpy.zeros((3, 2)) + a[0, 1] = 2 + p = a.reshape(-1).tolist() + [None] + numpy.testing.assert_equal(rdim.reverse(p, 0), a) + + def test_interval(self, rdim): + """Check method `interval`.""" + assert rdim.interval() == ( + -numpy.array(numpy.inf).astype(int) + 1, + numpy.array(numpy.inf).astype(int) - 1, + ) + + def test_interval_from_categorical(self, rdim2): + """Check how we should treat interval when original dimension is categorical.""" + assert rdim2.interval() == (0, 1) + + def test_eq(self, rdim, rdim2): + """Return True if other is the same transformed dimension as self""" + assert rdim != rdim2 + assert rdim == copy.deepcopy(rdim) + + def test_hash(self, rdim, rdim2): + """Test that hash is consistent for identical and different transformed dimensions""" + assert hash(rdim) != hash(rdim2) + assert hash(rdim) == hash(copy.deepcopy(rdim)) + + def test_get_hashable_members(self, rdim, rdim2): + """Test that hashable members of the transformed dimensions are the aggregation of + transformer's and original dimension's hashable members. + """ + assert rdim._get_hashable_members() == ( + "View", + "integer", + "integer", + "Compose", + "Quantize", + "real", + "integer", + "Identity", + "real", + "real", + "yolo", + (3, 2), + "real", + (0.9,), + (), + None, + "norm", + ) + assert rdim2._get_hashable_members() == ( + "View", + "real", + "real", + "Compose", + "OneHotEncode", + "integer", + "real", + 4, + "Compose", + "Enumerate", + "categorical", + "integer", + "Identity", + "categorical", + "categorical", + "yolo2", + (), + "categorical", + (), + (), + "2", + "Distribution", + ) + + def test_repr(self, rdim): + """Check method `__repr__`.""" + assert ( + str(rdim) + == "View(shape=(3, 2), index=(0, 1), Quantize(Real(name=yolo, prior={norm: (0.9,), {}}, shape=(3, 2), default value=None)))" + ) # noqa + + def test_name_property(self, rdim): + """Check property `name`.""" + assert rdim.name == "yolo[0,1]" + + def test_type_property(self, rdim, rdim2): + """Check property `type`.""" + assert rdim.type == "integer" + assert rdim2.type == "real" + + def test_prior_name_property(self, rdim, rdim2): + """Check property `prior_name`.""" + assert rdim.prior_name == "norm" + assert rdim2.prior_name == "choices" + + def test_shape_property(self, rdim, rdim2): + """Check property `shape`.""" + assert rdim.original_dimension.shape == (3, 2) + assert rdim.shape == () + assert rdim2.original_dimension.shape == (4,) + assert rdim2.shape == () + + def test_default_value_property(self, rdim, rdim2): + """Check property `default_value`.""" + assert rdim.default_value is Dimension.NO_DEFAULT_VALUE + assert rdim2.default_value == 1 + + +@pytest.fixture() +def space(dim, dim2, dim3): + """Create an example `Space`.""" + space = Space() + space.register(dim) + space.register(dim2) + space.register(dim3) + return space + + +@pytest.fixture() +def tspace(space, tdim, tdim2, tdim3): + """Create an example `TransformedSpace`.""" + tspace = TransformedSpace(space) + tspace.register(tdim) + tspace.register(tdim2) + tspace.register(tdim3) + return tspace + + +@pytest.fixture() +def rspace(tspace, rdims, rdims2, rdims3): + """Create an example `ReshapedSpace`.""" + rspace = ReshapedSpace(tspace) + for dim in itertools.chain(rdims.values(), rdims2.values(), rdims3.values()): + rspace.register(dim) + + return rspace + + +class TestTransformedSpace(object): + """Check functionality of class `TransformedSpace`.""" + + def test_extends_space(self, tspace): + """Check that `TransformedSpace` is actually a `Space`.""" + assert isinstance(tspace, Space) + + def test_transform(self, space, tspace, seed): + """Check method `transform`.""" + yo = space.sample(seed=seed)[0] + tyo = tspace.transform(yo) + assert tyo in tspace + + def test_reverse(self, space, tspace, seed): + """Check method `reverse`.""" + tyo = tspace.sample(seed=seed)[0] + yo = tspace.reverse(tyo) + assert yo in space + + def test_sample(self, space, tspace, seed): + """Check method `sample`.""" + points = tspace.sample(n_samples=2, seed=seed) + # pytest.set_trace() + assert len(points) == 2 + assert points[0] in tspace + assert points[1] in tspace + assert tspace.reverse(points[0]) in space + assert tspace.reverse(points[1]) in space + + +class TestReshapedSpace(object): + """Check functionality of class `ReshapeSpace`.""" + + def test_reverse(self, space, tspace, rspace, seed): + """Check method `reverse`.""" + ryo = format_trials.tuple_to_trial( + tuple( + numpy.zeros(tspace["yolo2"].shape).reshape(-1).tolist() + + [10] + + numpy.zeros(tspace["yolo"].shape).reshape(-1).tolist() + ), + rspace, + ) + yo = rspace.reverse(ryo) + assert yo in space + + def test_contains(self, tspace, rspace, seed): + """Check method `transform`.""" + ryo = format_trials.tuple_to_trial( + numpy.zeros(tspace["yolo2"].shape).reshape(-1).tolist() + + [10] + + numpy.zeros(tspace["yolo"].shape).reshape(-1).tolist(), + rspace, + ) + + assert ryo in rspace + + def test_transform(self, space, rspace, seed): + """Check method `transform`.""" + yo = space.sample(seed=seed)[0] + tyo = rspace.transform(yo) + assert tyo in rspace + + def test_sample(self, space, rspace, seed): + """Check method `sample`.""" + points = rspace.sample(n_samples=2, seed=seed) + assert len(points) == 2 + assert points[0] in rspace + assert points[1] in rspace + assert rspace.reverse(points[0]) in space + assert rspace.reverse(points[1]) in space + + def test_interval(self, rspace): + """Check method `interval`.""" + interval = rspace.interval() + + assert len(interval) == 3 * 2 + 4 + 1 + + # Test yolo2 + for i in range(4): + assert interval[i] == (0, 1) + + # Test yolo3 + assert interval[4] == (3, 10) + + # Test yolo[:, :] + for i in range(4 + 1, 4 + 1 + 3 * 2): + # assert interval[i] == (-float('inf'), float('inf')) + assert interval[i] == ( + -numpy.array(numpy.inf).astype(int) + 1, + numpy.array(numpy.inf).astype(int) - 1, + ) + + def test_reshape(self, space, rspace): + """Verify that the dimension are reshaped properly, forward and backward""" + trial = format_trials.tuple_to_trial( + (numpy.arange(6).reshape(3, 2).tolist(), "3", [10]), space + ) + + rtrial = format_trials.tuple_to_trial( + [0.0, 0.0, 1.0, 0.0] + + [10] + + numpy.array(trial.params["yolo"]).reshape(-1).tolist(), + rspace, + ) + assert rspace.transform(trial).params == rtrial.params + numpy.testing.assert_equal( + rspace.reverse(rtrial).params["yolo"], trial.params["yolo"] + ) + assert rspace.reverse(rtrial).params["yolo2"] == trial.params["yolo2"] + assert rspace.reverse(rtrial).params["yolo3"] == trial.params["yolo3"] + + def test_cardinality(self, dim2): + """Check cardinality of reshaped space""" + space = Space() + space.register(Real("yolo", "reciprocal", 0.1, 1, precision=1, shape=(2, 2))) + space.register(dim2) + + rspace = build_required_space(space, shape_requirement="flattened") + assert rspace.cardinality == (10 ** (2 * 2)) * 4 + + space = Space() + space.register(Real("yolo", "uniform", 0, 2, shape=(2, 2))) + space.register(dim2) + + rspace = build_required_space( + space, type_requirement="integer", shape_requirement="flattened" + ) + assert rspace.cardinality == (3 ** (2 * 2)) * 4 + + +@pytest.fixture() +def space_each_type(dim, dim2, dim3, logdim, logintdim): + """Create an example `Space`.""" + space = Space() + space.register(dim) + space.register(dim2) + space.register(dim3) + space.register(logdim) + space.register(logintdim) + return space + + +class TestRequiredSpaceBuilder(object): + """Check functionality of builder function `build_required_space`.""" + + @pytest.mark.xfail( + reason="Bring it back when testing new builder and extend to shape and dist" + ) + def test_not_supported_requirement(self, space_each_type): + """Require something which is not supported.""" + with pytest.raises(TypeError) as exc: + build_required_space(space_each_type, type_requirement="fasdfasf") + assert "Unsupported" in str(exc.value) + + def test_no_requirement(self, space_each_type): + """Check what is built using 'None' requirement.""" + tspace = build_required_space(space_each_type) + assert len(tspace) == 5 + assert tspace[0].type == "real" + assert tspace[1].type == "categorical" + # NOTE:HEAD + assert tspace[2].type == "integer" + assert tspace[3].type == "real" + assert tspace[4].type == "integer" + assert ( + str(tspace) + == """\ +Space([Precision(4, Real(name=yolo, prior={norm: (0.9,), {}}, shape=(3, 2), default value=None)), + Categorical(name=yolo2, prior={asdfa: 0.10, 2: 0.20, 3: 0.30, 4: 0.40}, shape=(), default value=2), + Integer(name=yolo3, prior={uniform: (3, 7), {}}, shape=(1,), default value=None), + Precision(4, Real(name=yolo4, prior={reciprocal: (1.0, 10.0), {}}, shape=(3, 2), default value=None)), + Integer(name=yolo5, prior={reciprocal: (1, 10), {}}, shape=(3, 2), default value=None)])\ +""" + ) # noqa + + def test_integer_requirement(self, space_each_type): + """Check what is built using 'integer' requirement.""" + tspace = build_required_space(space_each_type, type_requirement="integer") + assert len(tspace) == 5 + assert tspace[0].type == "integer" + assert tspace[1].type == "integer" + assert tspace[2].type == "integer" + assert tspace[3].type == "integer" + assert tspace[4].type == "integer" + assert ( + str(tspace) + == """\ +Space([Quantize(Precision(4, Real(name=yolo, prior={norm: (0.9,), {}}, shape=(3, 2), default value=None))), + Enumerate(Categorical(name=yolo2, prior={asdfa: 0.10, 2: 0.20, 3: 0.30, 4: 0.40}, shape=(), default value=2)), + Integer(name=yolo3, prior={uniform: (3, 7), {}}, shape=(1,), default value=None), + Quantize(Precision(4, Real(name=yolo4, prior={reciprocal: (1.0, 10.0), {}}, shape=(3, 2), default value=None))), + Integer(name=yolo5, prior={reciprocal: (1, 10), {}}, shape=(3, 2), default value=None)])\ +""" + ) # noqa + + def test_real_requirement(self, space_each_type): + """Check what is built using 'real' requirement.""" + tspace = build_required_space(space_each_type, type_requirement="real") + assert len(tspace) == 5 + assert tspace[0].type == "real" + assert tspace[1].type == "real" + assert tspace[2].type == "real" + assert tspace[3].type == "real" + assert tspace[4].type == "real" + assert ( + str(tspace) + == """\ +Space([Precision(4, Real(name=yolo, prior={norm: (0.9,), {}}, shape=(3, 2), default value=None)), + OneHotEncode(Enumerate(Categorical(name=yolo2, prior={asdfa: 0.10, 2: 0.20, 3: 0.30, 4: 0.40}, shape=(), default value=2))), + ReverseQuantize(Integer(name=yolo3, prior={uniform: (3, 7), {}}, shape=(1,), default value=None)), + Precision(4, Real(name=yolo4, prior={reciprocal: (1.0, 10.0), {}}, shape=(3, 2), default value=None)), + ReverseQuantize(Integer(name=yolo5, prior={reciprocal: (1, 10), {}}, shape=(3, 2), default value=None))])\ +""" + ) # noqa + + def test_numerical_requirement(self, space_each_type): + """Check what is built using 'integer' requirement.""" + tspace = build_required_space(space_each_type, type_requirement="numerical") + assert len(tspace) == 5 + assert tspace[0].type == "real" + assert tspace[1].type == "integer" + assert tspace[2].type == "integer" + assert tspace[3].type == "real" + assert tspace[4].type == "integer" + assert ( + str(tspace) + == """\ +Space([Precision(4, Real(name=yolo, prior={norm: (0.9,), {}}, shape=(3, 2), default value=None)), + Enumerate(Categorical(name=yolo2, prior={asdfa: 0.10, 2: 0.20, 3: 0.30, 4: 0.40}, shape=(), default value=2)), + Integer(name=yolo3, prior={uniform: (3, 7), {}}, shape=(1,), default value=None), + Precision(4, Real(name=yolo4, prior={reciprocal: (1.0, 10.0), {}}, shape=(3, 2), default value=None)), + Integer(name=yolo5, prior={reciprocal: (1, 10), {}}, shape=(3, 2), default value=None)])\ +""" + ) # noqa + + def test_linear_requirement(self, space_each_type): + """Check what is built using 'linear' requirement.""" + tspace = build_required_space(space_each_type, dist_requirement="linear") + assert len(tspace) == 5 + assert tspace[0].type == "real" + assert tspace[1].type == "categorical" + assert tspace[2].type == "integer" + assert tspace[3].type == "real" + assert tspace[4].type == "real" + assert ( + str(tspace) + == """\ +Space([Precision(4, Real(name=yolo, prior={norm: (0.9,), {}}, shape=(3, 2), default value=None)), + Categorical(name=yolo2, prior={asdfa: 0.10, 2: 0.20, 3: 0.30, 4: 0.40}, shape=(), default value=2), + Integer(name=yolo3, prior={uniform: (3, 7), {}}, shape=(1,), default value=None), + Linearize(Precision(4, Real(name=yolo4, prior={reciprocal: (1.0, 10.0), {}}, shape=(3, 2), default value=None))), + Linearize(ReverseQuantize(Integer(name=yolo5, prior={reciprocal: (1, 10), {}}, shape=(3, 2), default value=None)))])\ +""" + ) # noqa + + def test_flatten_requirement(self, space_each_type): + """Check what is built using 'flatten' requirement.""" + tspace = build_required_space(space_each_type, shape_requirement="flattened") + + # 1 integer + 1 categorical + 1 * (3, 2) shapes + assert len(tspace) == 1 + 1 + 3 * (3 * 2) + assert str(tspace).count("View") == 3 * (3 * 2) + 1 + + i = 0 + + assert tspace[i].type == "categorical" + i += 1 + + assert tspace[i].type == "integer" + i += 1 + + for _ in range(3 * 2): + assert tspace[i].type == "real" + i += 1 + + for _ in range(3 * 2): + assert tspace[i].type == "integer" + i += 1 + + for _ in range(3 * 2): + assert tspace[i].type == "real" + i += 1 + + tspace = build_required_space( + space_each_type, shape_requirement="flattened", type_requirement="real" + ) + + # 1 integer + 4 categorical + 1 * (3, 2) shapes + assert len(tspace) == 1 + 4 + 3 * (3 * 2) + assert str(tspace).count("View") == 4 + 3 * (3 * 2) + 1 + + def test_capacity(self, space_each_type): + """Check transformer space capacity""" + tspace = build_required_space(space_each_type, type_requirement="real") + assert tspace.cardinality == numpy.inf + + space = Space() + probs = (0.1, 0.2, 0.3, 0.4) + categories = ("asdfa", 2, 3, 4) + dim = Categorical("yolo", OrderedDict(zip(categories, probs)), shape=2) + space.register(dim) + dim = Integer("yolo2", "uniform", -3, 6) + space.register(dim) + tspace = build_required_space(space, type_requirement="integer") + assert tspace.cardinality == (4 ** 2) * (6 + 1) + + dim = Integer("yolo3", "uniform", -3, 6, shape=(2, 1)) + space.register(dim) + tspace = build_required_space(space, type_requirement="integer") + assert tspace.cardinality == (4 ** 2) * (6 + 1) * ((6 + 1) ** (2 * 1)) + + tspace = build_required_space( + space, type_requirement="integer", shape_requirement="flattened" + ) + assert tspace.cardinality == (4 ** 2) * (6 + 1) * ((6 + 1) ** (2 * 1)) + + tspace = build_required_space( + space, type_requirement="integer", dist_requirement="linear" + ) + assert tspace.cardinality == (4 ** 2) * (6 + 1) * ((6 + 1) ** (2 * 1)) + + +def test_quantization_does_not_violate_bounds(): + """Regress on bug that converts valid float in tdim to non valid excl. upper bound.""" + dim = Integer("yo", "uniform", 3, 7) + transformers = [Reverse(Quantize())] + tdim = TransformedDimension(Compose(transformers, dim.type), dim) + assert 11 not in dim + assert 10 in dim + # but be careful, because upper bound is inclusive + assert 11.5 not in tdim + # rounded to 11 + assert 10.6 not in tdim + # rounded to 10 + assert 10.4 in tdim + assert tdim.reverse(9.6) in dim + assert tdim.reverse(9.6) == 10 + assert tdim.reverse(9.4) == 9 + + +def test_precision_with_linear(space, logdim, logintdim): + """Test that precision isn't messed up by linearization.""" + space.register(logdim) + space.register(logintdim) + + # Force precision on all real or linearized dimensions + space["yolo"].precision = 3 + space["yolo4"].precision = 4 + space["yolo5"].precision = 5 + + # Create a point + trial = space.sample(1)[0] + real_index = list(space.keys()).index("yolo") + logreal_index = list(space.keys()).index("yolo4") + logint_index = list(space.keys()).index("yolo5") + trial._params[real_index].value = 0.133333 + trial._params[logreal_index].value = 0.1222222 + trial._params[logint_index].value = 2 + + # Check first without linearization + tspace = build_required_space(space, type_requirement="numerical") + # Check that transform is fine + ttrial = tspace.transform(trial) + assert ttrial.params["yolo"] == 0.133 + assert ttrial.params["yolo4"] == 0.1222 + assert ttrial.params["yolo5"] == 2 + + # Check that reserve does not break precision + rtrial = tspace.reverse(ttrial) + assert rtrial.params["yolo"] == 0.133 + assert rtrial.params["yolo4"] == 0.1222 + assert rtrial.params["yolo5"] == 2 + + # Check with linearization + tspace = build_required_space( + space, dist_requirement="linear", type_requirement="real" + ) + # Check that transform is fine + ttrial = tspace.transform(trial) + assert ttrial.params["yolo"] == 0.133 + assert ttrial.params["yolo4"] == numpy.log(0.1222) + assert ttrial.params["yolo5"] == numpy.log(2) + + # Check that reserve does not break precision + rtrial = tspace.reverse(ttrial) + assert rtrial.params["yolo"] == 0.133 + assert rtrial.params["yolo4"] == 0.1222 + assert rtrial.params["yolo5"] == 2 + + +def test_change_trial_params(space, rspace): + working_dir = "/new/working/dir" + status = "interrupted" + + rtrial = rspace.sample()[0] + # Sampling a new point in original space instead of using reserve() + trial = space.sample()[0] + point = format_trials.trial_to_tuple(trial, space) + + rtrial.exp_working_dir = working_dir + rtrial.status = status + + restored_trial = change_trial_params(rtrial, point, space) + + # Test that attributes are conserved + assert restored_trial.exp_working_dir == working_dir + assert restored_trial.status == status + + # Test params are updated + assert restored_trial.params != rtrial.params + assert all([a == b for a, b in zip(restored_trial.params, trial.params)]) + diff --git a/tests/test_space.py b/tests/test_space.py index bd80d99..950958c 100644 --- a/tests/test_space.py +++ b/tests/test_space.py @@ -3,7 +3,7 @@ import json import pytest -from orion.core.worker.transformer import build_required_space +from sspace.orion.transformer import build_required_space from sspace import Space, both, contains, either, eq, gt, lt, ne from sspace.convert import build_space, convert_space diff --git a/tox.ini b/tox.ini index 16c4a64..51a75a2 100644 --- a/tox.ini +++ b/tox.ini @@ -22,7 +22,7 @@ PLATFORM = [testenv] description = Run tests with coverage with pytest under current Python env usedevelop = true -setenv = COVERAGE_FILE=.coverage.{envname} +setenv = COVERAGE_FILE=.coverage_{envname} passenv = CI deps = -rtests/requirements.txt