From 8758423b39378cdf9f3feb5333a9fd70b3ce17b4 Mon Sep 17 00:00:00 2001 From: Tyler Date: Sat, 18 Mar 2023 20:01:25 -0500 Subject: [PATCH 01/36] added some files copied from parmoo.sim_funcs and Ackley C-BBO --- lib/DTLZ/__init__.py | 0 lib/DTLZ/benchmark.py | 10 + lib/DTLZ/dtlz.py | 776 ++++++++++++++++++++++++++++++++++++++++++ lib/DTLZ/hpo.py | 47 +++ 4 files changed, 833 insertions(+) create mode 100644 lib/DTLZ/__init__.py create mode 100644 lib/DTLZ/benchmark.py create mode 100644 lib/DTLZ/dtlz.py create mode 100644 lib/DTLZ/hpo.py diff --git a/lib/DTLZ/__init__.py b/lib/DTLZ/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/DTLZ/benchmark.py b/lib/DTLZ/benchmark.py new file mode 100644 index 0000000..1c1a399 --- /dev/null +++ b/lib/DTLZ/benchmark.py @@ -0,0 +1,10 @@ +import os + +from deephyper_benchmark import * + +DIR = os.path.dirname(os.path.abspath(__file__)) + + +class DTLZ_lib(Benchmark): + + version = "0.0.1" diff --git a/lib/DTLZ/dtlz.py b/lib/DTLZ/dtlz.py new file mode 100644 index 0000000..7b6f450 --- /dev/null +++ b/lib/DTLZ/dtlz.py @@ -0,0 +1,776 @@ +""" This module contains simulation function implementations of the DTLZ test +suite, as described in: + +Deb, Thiele, Laumanns, and Zitzler. "Scalable test problems for +evolutionary multiobjective optimization" in Evolutionary Multiobjective +Optimization, Theoretical Advances and Applications, Ch. 6 (pp. 105--145). +Springer-Verlag, London, UK, 2005. Abraham, Jain, and Goldberg (Eds). + +One drawback of the original DTLZ problems was that their global minima +(Pareto points) always corresponded to design points that satisfy + +x_i = 0.5, for i = number of objectives, ..., number of design points + +or + +x_i = 0, for i = number of objectives, ..., number of design points. + +This was appropriate for testing evolutionary algorithms, but for many +deterministic algorithms, these solutions may represent either the +best- or worst-case scenarios. + +To make these problems applicable for deterministic algorithms, the +solution sets must be configurable offset by a user-specified amount, +as proposed in: + +Chang. Mathematical Software for Multiobjective Optimization Problems. +Ph.D. dissertation, Virginia Tech, Dept. of Computer Science, 2020. + +For the problems DTLZ8 and DTLZ9, only objective outputs are given +by the simulation function herein. To fully define the problem, also +use one or more of the corresponding constraint classes included in +``parmoo.constraints.dtlz`` [NOT YET IMPLEMENTED]. + +The full list of simulation functions in this module includes the kernel +functions: + * ``g1`` + * ``g2`` + * ``g3`` + * ``g4`` + +and the 9 DTLZ problems in simulation form, with each simulation output +corresponding to an objective: + * ``dtlz1`` + * ``dtlz2`` + * ``dtlz3`` + * ``dtlz4`` + * ``dtlz5`` + * ``dtlz6`` + * ``dtlz7`` + * ``dtlz8`` + * ``dtlz9`` + +""" + +from parmoo.simulations import sim_func +from parmoo.util import unpack +import numpy as np + + +class g1(): + """ Class defining 1 of 4 kernel functions used in the DTLZ problem suite. + + g1 = 100 ( (n - o + 1) + + sum_{i=o}^n ((x_i - offset)^2 - cos(20pi(x_i - offset))) ) + + Contains 2 methods: + * ``__init__(des, num_obj)`` + * ``__call__(x)`` + + The ``__init__`` method creates a new kernel. + + The ``__call__`` method performs an evaluation of the g1 kernel. + + """ + + def __init__(self, num_des, num_obj=3, offset=0.5): + """ Constructor for g1 class. + + Args: + num_des (int): The number of design variables. + + num_obj (int, optional): The number of objectives. + + offset (optional, float): The location of the global minimizers + is x_i = offset for i = number of objectives, ..., number of + design variables. The default value is offset = 0.5. + + """ + + self.n = num_des + self.o = num_obj + self.offset = offset + return + + def __call__(self, x): + """ Define simulation evaluation. + + Args: + x (numpy.array): A numpy.ndarray containing the design point + to evaluate. + + Returns: + numpy.ndarray: The output of this simulation for the input x. + + """ + + # Calculate output + result = (1 + self.n - self.o + + np.sum((x[self.o-1:self.n] - self.offset) ** 2 - + np.cos(20.0 * np.pi * + (x[self.o-1:self.n] - self.offset)))) * 100.0 + return result + + +class g2(sim_func): + """ Class defining 2 of 4 kernel functions used in the DTLZ problem suite. + + g2 = (x_o - offset)^2 + ... + (x_n - offset)^2 + + Contains 2 methods: + * ``__init__(des)`` + * ``__call__(x)`` + + The ``__init__`` method inherits from the sim_func ABC. + + The ``__call__`` method performs an evaluation of the g2 problem. + + """ + + def __init__(self, des, num_obj=3, offset=0.5): + """ Constructor for g2 class. + + Args: + des (np.dtype or int): Either the numpy.dtype of the + design variables or the number of design variables, + assumed to all be continuous and unnamed. + + num_obj (int, optional): The number of objectives, which is + used to calculate the value of g2. Note that regardless of + the number of objectives, the number of simulation + outputs from g2 is always 1. + + offset (optional, float): The location of the global minimizers + is x_i = offset for i = number of objectives, ..., number of + design variables. The default value is offset = 0.5. + + """ + + super().__init__(des) + self.o = num_obj + self.offset = offset + return + + def __call__(self, x): + """ Define simulation evaluation. + + Args: + x (numpy.array): A numpy.ndarray (unnamed) or numpy structured + array (named), containing the design point to evaluate. + + Returns: + numpy.ndarray: The output of this simulation for the input x. + + """ + + # Extract x into xx, if names are used + xx = unpack(x, self.des_type) + return np.array([np.sum((xx[self.o-1:self.n] - self.offset) ** 2)]) + + +class g3(sim_func): + """ Class defining 3 of 4 kernel functions used in the DTLZ problem suite. + + g3 = |x_o - offset|^.1 + ... + |x_n - offset|^.1 + + Contains 2 methods: + * ``__init__(des)`` + * ``__call__(x)`` + + The ``__init__`` method inherits from the sim_func ABC. + + The ``__call__`` method performs an evaluation of the g3 problem. + + """ + + def __init__(self, des, num_obj=3, offset=0.0): + """ Constructor for g3 class. + + Args: + des (np.dtype or int): Either the numpy.dtype of the + design variables or the number of design variables, + assumed to all be continuous and unnamed. + + num_obj (int, optional): The number of objectives, which is + used to calculate the value of g3. Note that regardless of + the number of objectives, the number of simulation + outputs from g3 is always 1. + + offset (optional, float): The location of the global minimizers + is x_i = offset for i = number of objectives, ..., number of + design variables. The default value is offset = 0.0. + + """ + + super().__init__(des) + self.o = num_obj + self.offset = offset + return + + def __call__(self, x): + """ Define simulation evaluation. + + Args: + x (numpy.array): A numpy.ndarray (unnamed) or numpy structured + array (named), containing the design point to evaluate. + + Returns: + numpy.ndarray: The output of this simulation for the input x. + + """ + + # Extract x into xx, if names are used + xx = unpack(x, self.des_type) + return np.array([np.sum(np.abs(xx[self.o-1:self.n] - self.offset) + ** 0.1)]) + + +class g4(sim_func): + """ Class defining 4 of 4 kernel functions used in the DTLZ problem suite. + + g4 = 1 + (9 * (|x_o - offset| + ... + |x_n - offset|) / (n + 1 - o)) + + Contains 2 methods: + * ``__init__(des)`` + * ``__call__(x)`` + + The ``__init__`` method inherits from the sim_func ABC. + + The ``__call__`` method performs an evaluation of the g4 problem. + + """ + + def __init__(self, des, num_obj=3, offset=0.0): + """ Constructor for g4 class. + + Args: + des (np.dtype or int): Either the numpy.dtype of the + design variables or the number of design variables, + assumed to all be continuous and unnamed. + + num_obj (int, optional): The number of objectives, which is + used to calculate the value of g4. Note that regardless of + the number of objectives, the number of simulation + outputs from g4 is always 1. + + offset (optional, float): The location of the global minimizers + is x_i = offset for i = number of objectives, ..., number of + design variables. The default value is offset = 0.0. + + """ + + super().__init__(des) + self.o = num_obj + self.offset = offset + return + + def __call__(self, x): + """ Define simulation evaluation. + + Args: + x (numpy.array): A numpy.ndarray (unnamed) or numpy structured + array (named), containing the design point to evaluate. + + Returns: + numpy.ndarray: The output of this simulation for the input x. + + """ + + # Extract x into xx, if names are used + xx = unpack(x, self.des_type) + return np.array([(9 * np.sum(np.abs(xx[self.o-1:self.n] - self.offset)) + / float(self.n + 1 - self.o)) + 1.0]) + + +class dtlz1(sim_func): + """ Class defining the DTLZ1 problem with offset minimizer. + + DTLZ1 has a linear Pareto front, with all nondominated points + on the hyperplane F_1 + F_2 + ... + F_o = 0.5. + DTLZ1 has 11^k - 1 "local" Pareto fronts where k = n - o + 1, and + 1 "global" Pareto front. + + Contains 2 methods: + * ``__init__(des)`` + * ``__call__(x)`` + + The ``__init__`` method inherits from the sim_func ABC. + + The ``__call__`` method performs an evaluation of the DTLZ1 problem. + + """ + + def __init__(self, des, num_obj=3, offset=0.5): + """ Constructor for DTLZ1 class. + + Args: + des (np.dtype or int): Either the numpy.dtype of the + design variables or the number of design variables, + assumed to all be continuous and unnamed. + + num_obj (int, optional): The number of objectives, which is + used as the number of simulation outputs. + + offset (optional, float): The location of the global minimizers + is x_i = offset for i = number of objectives, ..., number of + design variables. The default value is offset = 0.5. + + """ + + super().__init__(des) + self.o = num_obj + self.offset = offset + return + + def __call__(self, x): + """ Define simulation evaluation. + + Args: + x (numpy.array): A numpy.ndarray (unnamed) or numpy structured + array (named), containing the design point to evaluate. + + Returns: + numpy.ndarray: The output of this simulation for the input x. + + """ + + # Extract x into xx, if names are used + xx = unpack(x, self.des_type) + # Initialize kernel function + ker = g1(self.n, self.o, self.offset) + # Initialize output array + fx = np.zeros(self.o) + fx[:] = (1.0 + ker(xx)[0]) / 2.0 + # Calculate the output array + for i in range(self.o): + for j in range(self.o - 1 - i): + fx[i] *= xx[j] + if i > 0: + fx[i] *= (1.0 - xx[self.o - 1 - i]) + return fx + + +class dtlz2(sim_func): + """ Class defining the DTLZ2 problem with offset minimizer. + + DTLZ2 has a concave Pareto front, given by the unit sphere in + objective space, restricted to the positive orthant. + DTLZ2 has no "local" Pareto fronts, besides the true Pareto front. + + Contains 2 methods: + * ``__init__(des, sim)`` + * ``__call__(x)`` + + The ``__init__`` method inherits from the sim_func ABC. + + The ``__call__`` method performs an evaluation of the DTLZ2 problem. + + """ + + def __init__(self, des, num_obj=3, offset=0.5): + """ Constructor for DTLZ2 class. + + Args: + des (np.dtype or int): Either the numpy.dtype of the + design variables or the number of design variables, + assumed to all be continuous and unnamed. + + num_obj (int, optional): The number of objectives, which is + used as the number of simulation outputs. + + offset (optional, float): The location of the global minimizers + is x_i = offset for i = number of objectives, ..., number of + design variables. The default value is offset = 0.5. + + """ + + super().__init__(des) + self.o = num_obj + self.offset = offset + return + + def __call__(self, x): + """ Define simulation evaluation. + + Args: + x (numpy.array): A numpy.ndarray (unnamed) or numpy structured + array (named), containing the design point to evaluate. + + Returns: + numpy.ndarray: The output of this simulation for the input x. + + """ + + # Extract x into xx, if names are used + xx = unpack(x, self.des_type) + # Initialize kernel function + ker = g2(self.n, self.o, self.offset) + # Initialize output array + fx = np.zeros(self.o) + fx[:] = (1.0 + ker(xx)[0]) + # Calculate the output array + for i in range(self.o): + for j in range(self.o - 1 - i): + fx[i] *= np.cos(np.pi * xx[j] / 2) + if i > 0: + fx[i] *= np.sin(np.pi * xx[self.o - 1 - i] / 2) + return fx + + +class dtlz3(sim_func): + """ Class defining the DTLZ3 problem with offset minimizer. + + DTLZ3 has a concave Pareto front, given by the unit sphere in + objective space, restricted to the positive orthant. + DTLZ3 has 3^k - 1 "local" Pareto fronts where k = n - o + 1, and + 1 "global" Pareto front. + + Contains 2 methods: + * ``__init__(des, sim)`` + * ``__call__(x)`` + + The ``__init__`` method inherits from the sim_func ABC. + + The ``__call__`` method performs an evaluation of the DTLZ3 problem. + + """ + + def __init__(self, des, num_obj=3, offset=0.5): + """ Constructor for DTLZ3 class. + + Args: + des (np.dtype or int): Either the numpy.dtype of the + design variables or the number of design variables, + assumed to all be continuous and unnamed. + + num_obj (int, optional): The number of objectives, which is + used as the number of simulation outputs. + + offset (optional, float): The location of the global minimizers + is x_i = offset for i = number of objectives, ..., number of + design variables. The default value is offset = 0.5. + + """ + + super().__init__(des) + self.o = num_obj + self.offset = offset + return + + def __call__(self, x): + """ Define simulation evaluation. + + Args: + x (numpy.array): A numpy.ndarray (unnamed) or numpy structured + array (named), containing the design point to evaluate. + + Returns: + numpy.ndarray: The output of this simulation for the input x. + + """ + + # Extract x into xx, if names are used + xx = unpack(x, self.des_type) + # Initialize kernel function + ker = g1(self.n, self.o, self.offset) + # Initialize output array + fx = np.zeros(self.o) + fx[:] = (1.0 + ker(xx)[0]) + # Calculate the output array + for i in range(self.o): + for j in range(self.o - 1 - i): + fx[i] *= np.cos(np.pi * xx[j] / 2) + if i > 0: + fx[i] *= np.sin(np.pi * xx[self.o - 1 - i] / 2) + return fx + + +class dtlz4(sim_func): + """ Class defining the DTLZ4 problem with offset minimizer. + + DTLZ4 has a concave Pareto front, given by the unit sphere in + objective space, restricted to the positive orthant. + DTLZ4 has no "local" Pareto fronts, besides the true Pareto front, + but by tuning the optional parameter alpha, one can adjust the + solution density, making it harder for MOO algorithms to produce + a uniform distribution of solutions. + + Contains 2 methods: + * ``__init__(des, sim)`` + * ``__call__(x)`` + + The ``__init__`` method inherits from the sim_func ABC. + + The ``__call__`` method performs an evaluation of the DTLZ4 problem. + + """ + + def __init__(self, des, num_obj=3, offset=0.5, alpha=100.0): + """ Constructor for DTLZ4 class. + + Args: + des (np.dtype or int): Either the numpy.dtype of the + design variables or the number of design variables, + assumed to all be continuous and unnamed. + + num_obj (int, optional): The number of objectives, which is + used as the number of simulation outputs. + + offset (optional, float): The location of the global minimizers + is x_i = offset for i = number of objectives, ..., number of + design variables. The default value is offset = 0.5. + + alpha (optional, float or int): The uniformity parameter used for + controlling the uniformity of the distribution of solutions + across the Pareto front. Must be greater than or equal to 1. + A value of 1 results in DTLZ2. Default value is 100.0. + + """ + + super().__init__(des) + self.o = num_obj + self.offset = offset + self.alpha = alpha + return + + def __call__(self, x): + """ Define simulation evaluation. + + Args: + x (numpy.array): A numpy.ndarray (unnamed) or numpy structured + array (named), containing the design point to evaluate. + + Returns: + numpy.ndarray: The output of this simulation for the input x. + + """ + + # Extract x into xx, if names are used + xx = unpack(x, self.des_type) + # Initialize kernel function + ker = g2(self.n, self.o, self.offset) + # Initialize output array + fx = np.zeros(self.o) + fx[:] = (1.0 + ker(xx)[0]) + # Calculate the output array + for i in range(self.o): + for j in range(self.o - 1 - i): + fx[i] *= np.cos(np.pi * xx[j] ** self.alpha / 2) + if i > 0: + fx[i] *= np.sin(np.pi * xx[self.o - 1 - i] ** self.alpha / 2) + return fx + + +class dtlz5(sim_func): + """ Class defining the DTLZ5 problem with offset minimizer. + + DTLZ5 has a lower-dimensional Pareto front embedded in the objective + space, given by an arc of the unit sphere in the positive orthant. + DTLZ5 has no "local" Pareto fronts, besides the true Pareto front. + + Contains 2 methods: + * ``__init__(des, sim)`` + * ``__call__(x)`` + + The ``__init__`` method inherits from the sim_func ABC. + + The ``__call__`` method performs an evaluation of the DTLZ5 problem. + + """ + + def __init__(self, des, num_obj=3, offset=0.5): + """ Constructor for DTLZ5 class. + + Args: + des (np.dtype or int): Either the numpy.dtype of the + design variables or the number of design variables, + assumed to all be continuous and unnamed. + + num_obj (int, optional): The number of objectives, which is + used as the number of simulation outputs. + + offset (optional, float): The location of the global minimizers + is x_i = offset for i = number of objectives, ..., number of + design variables. The default value is offset = 0.5. + + """ + + super().__init__(des) + self.o = num_obj + self.offset = offset + return + + def __call__(self, x): + """ Define simulation evaluation. + + Args: + x (numpy.array): A numpy.ndarray (unnamed) or numpy structured + array (named), containing the design point to evaluate. + + Returns: + numpy.ndarray: The output of this simulation for the input x. + + """ + + # Extract x into xx, if names are used + xx = unpack(x, self.des_type) + # Initialize kernel function + ker = g2(self.n, self.o, self.offset) + # Calculate theta values + theta = np.zeros(self.o - 1) + g2x = ker(xx) + for i in range(self.o - 1): + theta[i] = np.pi * (1 + 2 * g2x * xx[i]) / (4 * (1 + g2x)) + # Initialize output array + fx = np.zeros(self.o) + fx[:] = (1.0 + g2x) + # Calculate the output array + for i in range(self.o): + for j in range(self.o - 1 - i): + fx[i] *= np.cos(np.pi * theta[j] / 2) + if i > 0: + fx[i] *= np.sin(np.pi * theta[self.o - 1 - i] / 2) + return fx + + +class dtlz6(sim_func): + """ Class defining the DTLZ6 problem with offset minimizer. + + DTLZ6 has a lower-dimensional Pareto front embedded in the objective + space, given by an arc of the unit sphere in the positive orthant. + DTLZ6 has no "local" Pareto fronts, but tends to show very little + improvement until the algorithm is very close to its solution set. + + Contains 2 methods: + * ``__init__(des, sim)`` + * ``__call__(x)`` + + The ``__init__`` method inherits from the sim_func ABC. + + The ``__call__`` method performs an evaluation of the DTLZ6 problem. + + """ + + def __init__(self, des, num_obj=3, offset=0.0): + """ Constructor for DTLZ6 class. + + Args: + des (np.dtype or int): Either the numpy.dtype of the + design variables or the number of design variables, + assumed to all be continuous and unnamed. + + num_obj (int, optional): The number of objectives, which is + used as the number of simulation outputs. + + offset (optional, float): The location of the global minimizers + is x_i = offset for i = number of objectives, ..., number of + design variables. The default value is offset = 0.0. + + """ + + super().__init__(des) + self.o = num_obj + self.offset = offset + return + + def __call__(self, x): + """ Define simulation evaluation. + + Args: + x (numpy.array): A numpy.ndarray (unnamed) or numpy structured + array (named), containing the design point to evaluate. + + Returns: + numpy.ndarray: The output of this simulation for the input x. + + """ + + # Extract x into xx, if names are used + xx = unpack(x, self.des_type) + # Initialize kernel function + ker = g3(self.n, self.o, self.offset) + # Calculate theta values + theta = np.zeros(self.o - 1) + g3x = ker(xx) + for i in range(self.o - 1): + theta[i] = np.pi * (1 + 2 * g3x * xx[i]) / (4 * (1 + g3x)) + # Initialize output array + fx = np.zeros(self.o) + fx[:] = (1.0 + g3x) + # Calculate the output array + for i in range(self.o): + for j in range(self.o - 1 - i): + fx[i] *= np.cos(np.pi * theta[j] / 2) + if i > 0: + fx[i] *= np.sin(np.pi * theta[self.o - 1 - i] / 2) + return fx + + +class dtlz7(sim_func): + """ Class defining the DTLZ7 problem with offset minimizer. + + DTLZ7 has a discontinuous Pareto front, with solutions on the + 2^(o-1) discontinuous nondominated regions of the surface: + + F_m = o - F_1 (1 + sin(3pi F_1)) - ... - F_{o-1} (1 + sin3pi F_{o-1}). + + Contains 2 methods: + * ``__init__(des, sim)`` + * ``__call__(x)`` + + The ``__init__`` method inherits from the sim_func ABC. + + The ``__call__`` method performs an evaluation of the DTLZ7 problem. + + """ + + def __init__(self, des, num_obj=3, offset=0.0): + """ Constructor for DTLZ7 class. + + Args: + des (np.dtype or int): Either the numpy.dtype of the + design variables or the number of design variables, + assumed to all be continuous and unnamed. + + num_obj (int, optional): The number of objectives, which is + used as the number of simulation outputs. + + offset (optional, float): The location of the global minimizers + is x_i = offset for i = number of objectives, ..., number of + design variables. The default value is offset = 0.0. + + """ + + super().__init__(des) + self.o = num_obj + self.offset = offset + return + + def __call__(self, x): + """ Define simulation evaluation. + + Args: + x (numpy.array): A numpy.ndarray (unnamed) or numpy structured + array (named), containing the design point to evaluate. + + Returns: + numpy.ndarray: The output of this simulation for the input x. + + """ + + # Extract x into xx, if names are used + xx = unpack(x, self.des_type) + # Initialize kernel function + ker = g4(self.n, self.o, self.offset) + # Initialize first o-1 entries in the output array + fx = np.zeros(self.o) + print(xx) + fx[:self.o-1] = xx[:self.o-1] + # Calculate kernel functions + gx = 1.0 + ker(xx) + hx = (-np.sum(xx[:self.o-1] * + (1.0 + np.sin(3.0 * np.pi * xx[:self.o-1]) / gx)) + + float(self.o)) + # Calculate the last entry in the output array + fx[self.o-1] = gx * hx + return fx diff --git a/lib/DTLZ/hpo.py b/lib/DTLZ/hpo.py new file mode 100644 index 0000000..533a8f7 --- /dev/null +++ b/lib/DTLZ/hpo.py @@ -0,0 +1,47 @@ +import os + +import time +import numpy as np +from deephyper.problem import HpProblem +from deephyper.evaluator import profile, RunningJob +from .dtlz import dtlz1, dtlz2, dtlz3, dtlz4, dtlz5, dtlz6, dtlz7 + +nb_dim = os.environ.get("DEEPHYPER_BENCHMARK_NDIMS", 5) +domain = (-32.768, 32.768) +problem = HpProblem() +for i in range(nb_dim): + problem.add_hyperparameter(domain, f"x{i}") + + +def ackley(x, a=20, b=0.2, c=2 * np.pi): + d = len(x) + s1 = np.sum(x**2) + s2 = np.sum(np.cos(c * x)) + term1 = -a * np.exp(-b * np.sqrt(s1 / d)) + term2 = -np.exp(s2 / d) + y = term1 + term2 + a + np.exp(1) + return y + + +@profile +def run(job: RunningJob, sleep=False, sleep_mean=60, sleep_noise=20) -> dict: + + config = job.parameters + + if sleep: + t_sleep = np.random.normal(loc=sleep_mean, scale=sleep_noise) + t_sleep = max(t_sleep, 0) + time.sleep(t_sleep) + + x = np.array([config[k] for k in config if "x" in k]) + x = np.asarray_chkfinite(x) # ValueError if any NaN or Inf + + return -ackley(x) + + +if __name__ == "__main__": + print(problem) + default_config = problem.default_configuration + print(f"{default_config=}") + result = run(RunningJob(parameters=default_config)) + print(f"{result=}") From bb6e2194b11758956eeb8dc1d94d85d7474e4eb1 Mon Sep 17 00:00:00 2001 From: Tyler Date: Sun, 19 Mar 2023 15:58:58 -0500 Subject: [PATCH 02/36] modified dtlz from parmoo interface to match deephyper benchmark interface, modified hpo file to call dtlz suite --- lib/DTLZ/dtlz.py | 543 ++++++++++++++++------------------------------- lib/DTLZ/hpo.py | 23 +- 2 files changed, 194 insertions(+), 372 deletions(-) diff --git a/lib/DTLZ/dtlz.py b/lib/DTLZ/dtlz.py index 7b6f450..06e82e7 100644 --- a/lib/DTLZ/dtlz.py +++ b/lib/DTLZ/dtlz.py @@ -1,45 +1,29 @@ -""" This module contains simulation function implementations of the DTLZ test -suite, as described in: +""" This module contains objective function implementations of the DTLZ test +suite, derived from the implementations in ParMOO: + +Chang and Wild. "ParMOO: A Python library for parallel multiobjective +simulation optimization." Journal of Open Source Software 8(82):4468, 2023. + +------------------------------------------------------------------------------ + +For further references, the DTLZ test suite was originally proposed in: Deb, Thiele, Laumanns, and Zitzler. "Scalable test problems for evolutionary multiobjective optimization" in Evolutionary Multiobjective Optimization, Theoretical Advances and Applications, Ch. 6 (pp. 105--145). Springer-Verlag, London, UK, 2005. Abraham, Jain, and Goldberg (Eds). -One drawback of the original DTLZ problems was that their global minima -(Pareto points) always corresponded to design points that satisfy - -x_i = 0.5, for i = number of objectives, ..., number of design points - -or +The original implementation was appropriate for testing randomized algorithms, +but for many deterministic algorithms, the global solutions represent either +best- or worst-case scenarios, so an configurable offset was introduced in: -x_i = 0, for i = number of objectives, ..., number of design points. - -This was appropriate for testing evolutionary algorithms, but for many -deterministic algorithms, these solutions may represent either the -best- or worst-case scenarios. - -To make these problems applicable for deterministic algorithms, the -solution sets must be configurable offset by a user-specified amount, -as proposed in: - -Chang. Mathematical Software for Multiobjective Optimization Problems. +Chang. "Mathematical Software for Multiobjective Optimization Problems." Ph.D. dissertation, Virginia Tech, Dept. of Computer Science, 2020. -For the problems DTLZ8 and DTLZ9, only objective outputs are given -by the simulation function herein. To fully define the problem, also -use one or more of the corresponding constraint classes included in -``parmoo.constraints.dtlz`` [NOT YET IMPLEMENTED]. - -The full list of simulation functions in this module includes the kernel -functions: - * ``g1`` - * ``g2`` - * ``g3`` - * ``g4`` +------------------------------------------------------------------------------ -and the 9 DTLZ problems in simulation form, with each simulation output -corresponding to an objective: +The full list of public classes in this module includes the 7 unconstrained +DTLZ problems: * ``dtlz1`` * ``dtlz2`` * ``dtlz3`` @@ -52,29 +36,14 @@ """ -from parmoo.simulations import sim_func -from parmoo.util import unpack import numpy as np -class g1(): - """ Class defining 1 of 4 kernel functions used in the DTLZ problem suite. - - g1 = 100 ( (n - o + 1) + - sum_{i=o}^n ((x_i - offset)^2 - cos(20pi(x_i - offset))) ) - - Contains 2 methods: - * ``__init__(des, num_obj)`` - * ``__call__(x)`` - - The ``__init__`` method creates a new kernel. - - The ``__call__`` method performs an evaluation of the g1 kernel. - - """ +class __dtlz_base__(): + """ Base class implements re-used constructor """ def __init__(self, num_des, num_obj=3, offset=0.5): - """ Constructor for g1 class. + """ Constructor for all DTLZ classes. Args: num_des (int): The number of design variables. @@ -82,8 +51,8 @@ def __init__(self, num_des, num_obj=3, offset=0.5): num_obj (int, optional): The number of objectives. offset (optional, float): The location of the global minimizers - is x_i = offset for i = number of objectives, ..., number of - design variables. The default value is offset = 0.5. + is x_i = offset for i = num_objectives, ..., num_des. + The default offset is 0.5. """ @@ -93,196 +62,169 @@ def __init__(self, num_des, num_obj=3, offset=0.5): return def __call__(self, x): - """ Define simulation evaluation. + raise NotImplementedError("The call method must be implemented...") + + +class __g1__(__dtlz_base__): + """ Class defining 1 of 4 kernel functions used in the DTLZ problem suite. + + g1 = 100 ( (n - o + 1) + + sum_{i=o}^n ((x_i - offset)^2 - cos(20pi(x_i - offset))) ) + + Contains 2 methods: + * ``__init__(num_des, num_obj)`` + * ``__call__(x)`` + + The ``__init__`` method creates a new kernel. + + The ``__call__`` method performs an evaluation of the g1 kernel. + + """ + + def __call__(self, x): + """ Define objective evaluation. Args: x (numpy.array): A numpy.ndarray containing the design point to evaluate. Returns: - numpy.ndarray: The output of this simulation for the input x. + float: The output of this objective for the input x. """ - # Calculate output - result = (1 + self.n - self.o + - np.sum((x[self.o-1:self.n] - self.offset) ** 2 - - np.cos(20.0 * np.pi * - (x[self.o-1:self.n] - self.offset)))) * 100.0 - return result + return (1 + self.n - self.o + + np.sum((x[self.o-1:self.n] - self.offset) ** 2 - + np.cos(20.0 * np.pi * + (x[self.o-1:self.n] - self.offset)))) * 100.0 -class g2(sim_func): +class __g2__(__dtlz_base__): """ Class defining 2 of 4 kernel functions used in the DTLZ problem suite. g2 = (x_o - offset)^2 + ... + (x_n - offset)^2 Contains 2 methods: - * ``__init__(des)`` + * ``__init__(num_des, num_obj)`` * ``__call__(x)`` - The ``__init__`` method inherits from the sim_func ABC. + The ``__init__`` method inherits from the __dtlz_base__ ABC. The ``__call__`` method performs an evaluation of the g2 problem. """ - def __init__(self, des, num_obj=3, offset=0.5): - """ Constructor for g2 class. - - Args: - des (np.dtype or int): Either the numpy.dtype of the - design variables or the number of design variables, - assumed to all be continuous and unnamed. - - num_obj (int, optional): The number of objectives, which is - used to calculate the value of g2. Note that regardless of - the number of objectives, the number of simulation - outputs from g2 is always 1. - - offset (optional, float): The location of the global minimizers - is x_i = offset for i = number of objectives, ..., number of - design variables. The default value is offset = 0.5. - - """ - - super().__init__(des) - self.o = num_obj - self.offset = offset - return - def __call__(self, x): - """ Define simulation evaluation. + """ Define objective evaluation. Args: - x (numpy.array): A numpy.ndarray (unnamed) or numpy structured - array (named), containing the design point to evaluate. + x (numpy.array): A numpy.ndarray containing the design point + to evaluate. Returns: - numpy.ndarray: The output of this simulation for the input x. + float: The output of this objective for the input x. """ - # Extract x into xx, if names are used - xx = unpack(x, self.des_type) - return np.array([np.sum((xx[self.o-1:self.n] - self.offset) ** 2)]) + return np.sum((x[self.o-1:self.n] - self.offset) ** 2) -class g3(sim_func): +class __g3__(__dtlz_base__): """ Class defining 3 of 4 kernel functions used in the DTLZ problem suite. g3 = |x_o - offset|^.1 + ... + |x_n - offset|^.1 Contains 2 methods: - * ``__init__(des)`` + * ``__init__(num_des, num_obj)`` * ``__call__(x)`` - The ``__init__`` method inherits from the sim_func ABC. + The ``__init__`` method inherits from the __dtlz_base__ ABC. The ``__call__`` method performs an evaluation of the g3 problem. """ - def __init__(self, des, num_obj=3, offset=0.0): - """ Constructor for g3 class. + def __init__(self, num_des, num_obj=3, offset=0.0): + """ Constructor for g3, with modified default offset. Args: - des (np.dtype or int): Either the numpy.dtype of the - design variables or the number of design variables, - assumed to all be continuous and unnamed. + num_des (int): The number of design variables. - num_obj (int, optional): The number of objectives, which is - used to calculate the value of g3. Note that regardless of - the number of objectives, the number of simulation - outputs from g3 is always 1. + num_obj (int, optional): The number of objectives. offset (optional, float): The location of the global minimizers - is x_i = offset for i = number of objectives, ..., number of - design variables. The default value is offset = 0.0. + is x_i = offset for i = num_objectives, ..., num_des. + The default offset is 0.0. """ - super().__init__(des) - self.o = num_obj - self.offset = offset + super().__init__(num_des=num_des, num_obj=num_obj, offset=offset) return def __call__(self, x): - """ Define simulation evaluation. + """ Define objective evaluation. Args: - x (numpy.array): A numpy.ndarray (unnamed) or numpy structured - array (named), containing the design point to evaluate. + x (numpy.array): A numpy.ndarray containing the design point + to evaluate. Returns: - numpy.ndarray: The output of this simulation for the input x. + float: The output of this objective for the input x. """ - # Extract x into xx, if names are used - xx = unpack(x, self.des_type) - return np.array([np.sum(np.abs(xx[self.o-1:self.n] - self.offset) - ** 0.1)]) + return np.sum(np.abs(x[self.o-1:self.n] - self.offset) ** 0.1) -class g4(sim_func): +class __g4__(__dtlz_base__): """ Class defining 4 of 4 kernel functions used in the DTLZ problem suite. g4 = 1 + (9 * (|x_o - offset| + ... + |x_n - offset|) / (n + 1 - o)) Contains 2 methods: - * ``__init__(des)`` + * ``__init__(num_des, num_obj)`` * ``__call__(x)`` - The ``__init__`` method inherits from the sim_func ABC. + The ``__init__`` method inherits from the __dtlz_base__ ABC. The ``__call__`` method performs an evaluation of the g4 problem. """ - def __init__(self, des, num_obj=3, offset=0.0): - """ Constructor for g4 class. + def __init__(self, num_des, num_obj=3, offset=0.0): + """ Constructor for g4, with modified default offset. Args: - des (np.dtype or int): Either the numpy.dtype of the - design variables or the number of design variables, - assumed to all be continuous and unnamed. + num_des (int): The number of design variables. - num_obj (int, optional): The number of objectives, which is - used to calculate the value of g4. Note that regardless of - the number of objectives, the number of simulation - outputs from g4 is always 1. + num_obj (int, optional): The number of objectives. offset (optional, float): The location of the global minimizers - is x_i = offset for i = number of objectives, ..., number of - design variables. The default value is offset = 0.0. + is x_i = offset for i = num_objectives, ..., num_des. + The default offset is 0.0. """ - super().__init__(des) - self.o = num_obj - self.offset = offset + super().__init__(num_des=num_des, num_obj=num_obj, offset=offset) return def __call__(self, x): - """ Define simulation evaluation. + """ Define objective evaluation. Args: - x (numpy.array): A numpy.ndarray (unnamed) or numpy structured - array (named), containing the design point to evaluate. + x (numpy.array): A numpy.ndarray containing the design point + to evaluate. Returns: - numpy.ndarray: The output of this simulation for the input x. + float: The output of this objective for the input x. """ - # Extract x into xx, if names are used - xx = unpack(x, self.des_type) - return np.array([(9 * np.sum(np.abs(xx[self.o-1:self.n] - self.offset)) - / float(self.n + 1 - self.o)) + 1.0]) + return (9 * np.sum(np.abs(x[self.o-1:self.n] - self.offset)) + / float(self.n + 1 - self.o)) + 1.0 -class dtlz1(sim_func): +class dtlz1(__dtlz_base__): """ Class defining the DTLZ1 problem with offset minimizer. DTLZ1 has a linear Pareto front, with all nondominated points @@ -291,66 +233,42 @@ class dtlz1(sim_func): 1 "global" Pareto front. Contains 2 methods: - * ``__init__(des)`` + * ``__init__(num_des, num_obj)`` * ``__call__(x)`` - The ``__init__`` method inherits from the sim_func ABC. + The ``__init__`` method inherits from the __dtlz_base__ ABC. The ``__call__`` method performs an evaluation of the DTLZ1 problem. """ - def __init__(self, des, num_obj=3, offset=0.5): - """ Constructor for DTLZ1 class. - - Args: - des (np.dtype or int): Either the numpy.dtype of the - design variables or the number of design variables, - assumed to all be continuous and unnamed. - - num_obj (int, optional): The number of objectives, which is - used as the number of simulation outputs. - - offset (optional, float): The location of the global minimizers - is x_i = offset for i = number of objectives, ..., number of - design variables. The default value is offset = 0.5. - - """ - - super().__init__(des) - self.o = num_obj - self.offset = offset - return - def __call__(self, x): - """ Define simulation evaluation. + """ Define objective evaluation. Args: - x (numpy.array): A numpy.ndarray (unnamed) or numpy structured - array (named), containing the design point to evaluate. + x (numpy.ndarray): A numpy.ndarray containing the design point + to evaluate. Returns: - numpy.ndarray: The output of this simulation for the input x. + numpy float array: The output of this objective for the input x. """ - # Extract x into xx, if names are used - xx = unpack(x, self.des_type) # Initialize kernel function - ker = g1(self.n, self.o, self.offset) + ker = __g1__(self.n, self.o, self.offset) # Initialize output array fx = np.zeros(self.o) - fx[:] = (1.0 + ker(xx)[0]) / 2.0 + fx[:] = (1.0 + ker(x)) / 2.0 # Calculate the output array for i in range(self.o): for j in range(self.o - 1 - i): - fx[i] *= xx[j] + fx[i] *= x[j] if i > 0: - fx[i] *= (1.0 - xx[self.o - 1 - i]) + fx[i] *= (1.0 - x[self.o - 1 - i]) return fx -class dtlz2(sim_func): +class dtlz2(__dtlz_base__): """ Class defining the DTLZ2 problem with offset minimizer. DTLZ2 has a concave Pareto front, given by the unit sphere in @@ -358,66 +276,42 @@ class dtlz2(sim_func): DTLZ2 has no "local" Pareto fronts, besides the true Pareto front. Contains 2 methods: - * ``__init__(des, sim)`` + * ``__init__(num_des, num_obj)`` * ``__call__(x)`` - The ``__init__`` method inherits from the sim_func ABC. + The ``__init__`` method inherits from the __dtlz_base__ ABC. The ``__call__`` method performs an evaluation of the DTLZ2 problem. """ - def __init__(self, des, num_obj=3, offset=0.5): - """ Constructor for DTLZ2 class. - - Args: - des (np.dtype or int): Either the numpy.dtype of the - design variables or the number of design variables, - assumed to all be continuous and unnamed. - - num_obj (int, optional): The number of objectives, which is - used as the number of simulation outputs. - - offset (optional, float): The location of the global minimizers - is x_i = offset for i = number of objectives, ..., number of - design variables. The default value is offset = 0.5. - - """ - - super().__init__(des) - self.o = num_obj - self.offset = offset - return - def __call__(self, x): - """ Define simulation evaluation. + """ Define objective evaluation. Args: - x (numpy.array): A numpy.ndarray (unnamed) or numpy structured - array (named), containing the design point to evaluate. + x (numpy.ndarray): A numpy.ndarray containing the design point + to evaluate. Returns: - numpy.ndarray: The output of this simulation for the input x. + numpy float array: The output of this objective for the input x. """ - # Extract x into xx, if names are used - xx = unpack(x, self.des_type) # Initialize kernel function - ker = g2(self.n, self.o, self.offset) + ker = __g2__(self.n, self.o, self.offset) # Initialize output array fx = np.zeros(self.o) - fx[:] = (1.0 + ker(xx)[0]) + fx[:] = (1.0 + ker(x)) # Calculate the output array for i in range(self.o): for j in range(self.o - 1 - i): - fx[i] *= np.cos(np.pi * xx[j] / 2) + fx[i] *= np.cos(np.pi * x[j] / 2) if i > 0: - fx[i] *= np.sin(np.pi * xx[self.o - 1 - i] / 2) + fx[i] *= np.sin(np.pi * x[self.o - 1 - i] / 2) return fx -class dtlz3(sim_func): +class dtlz3(__dtlz_base__): """ Class defining the DTLZ3 problem with offset minimizer. DTLZ3 has a concave Pareto front, given by the unit sphere in @@ -426,66 +320,43 @@ class dtlz3(sim_func): 1 "global" Pareto front. Contains 2 methods: - * ``__init__(des, sim)`` + * ``__init__(num_des, num_obj)`` * ``__call__(x)`` - The ``__init__`` method inherits from the sim_func ABC. + The ``__init__`` method inherits from the __dtlz_base__ ABC. The ``__call__`` method performs an evaluation of the DTLZ3 problem. """ - def __init__(self, des, num_obj=3, offset=0.5): - """ Constructor for DTLZ3 class. - - Args: - des (np.dtype or int): Either the numpy.dtype of the - design variables or the number of design variables, - assumed to all be continuous and unnamed. - - num_obj (int, optional): The number of objectives, which is - used as the number of simulation outputs. - - offset (optional, float): The location of the global minimizers - is x_i = offset for i = number of objectives, ..., number of - design variables. The default value is offset = 0.5. - - """ - - super().__init__(des) - self.o = num_obj - self.offset = offset - return - def __call__(self, x): - """ Define simulation evaluation. + """ Define objective evaluation. Args: - x (numpy.array): A numpy.ndarray (unnamed) or numpy structured - array (named), containing the design point to evaluate. + x (numpy.ndarray): A numpy.ndarray containing the design point + to evaluate. Returns: - numpy.ndarray: The output of this simulation for the input x. + numpy float array: The output of this objective for the input x. """ - # Extract x into xx, if names are used - xx = unpack(x, self.des_type) + # Initialize kernel function - ker = g1(self.n, self.o, self.offset) + ker = __g1__(self.n, self.o, self.offset) # Initialize output array fx = np.zeros(self.o) - fx[:] = (1.0 + ker(xx)[0]) + fx[:] = (1.0 + ker(x)) # Calculate the output array for i in range(self.o): for j in range(self.o - 1 - i): - fx[i] *= np.cos(np.pi * xx[j] / 2) + fx[i] *= np.cos(np.pi * x[j] / 2) if i > 0: - fx[i] *= np.sin(np.pi * xx[self.o - 1 - i] / 2) + fx[i] *= np.sin(np.pi * x[self.o - 1 - i] / 2) return fx -class dtlz4(sim_func): +class dtlz4(__dtlz_base__): """ Class defining the DTLZ4 problem with offset minimizer. DTLZ4 has a concave Pareto front, given by the unit sphere in @@ -496,29 +367,26 @@ class dtlz4(sim_func): a uniform distribution of solutions. Contains 2 methods: - * ``__init__(des, sim)`` + * ``__init__(num_des, num_obj)`` * ``__call__(x)`` - The ``__init__`` method inherits from the sim_func ABC. + The ``__init__`` method inherits from the __dtlz_base__ ABC. The ``__call__`` method performs an evaluation of the DTLZ4 problem. """ - def __init__(self, des, num_obj=3, offset=0.5, alpha=100.0): - """ Constructor for DTLZ4 class. + def __init__(self, num_des, num_obj=3, offset=0.0, alpha=100.0): + """ Constructor for DTLZ7, with modified default offset. Args: - des (np.dtype or int): Either the numpy.dtype of the - design variables or the number of design variables, - assumed to all be continuous and unnamed. + num_des (int): The number of design variables. - num_obj (int, optional): The number of objectives, which is - used as the number of simulation outputs. + num_obj (int, optional): The number of objectives. offset (optional, float): The location of the global minimizers - is x_i = offset for i = number of objectives, ..., number of - design variables. The default value is offset = 0.5. + is x_i = offset for i = num_objectives, ..., num_des. + The default offset is 0.0. alpha (optional, float or int): The uniformity parameter used for controlling the uniformity of the distribution of solutions @@ -527,41 +395,37 @@ def __init__(self, des, num_obj=3, offset=0.5, alpha=100.0): """ - super().__init__(des) - self.o = num_obj - self.offset = offset + super().__init__(num_des=num_des, num_obj=num_obj, offset=offset) self.alpha = alpha return def __call__(self, x): - """ Define simulation evaluation. + """ Define objective evaluation. Args: - x (numpy.array): A numpy.ndarray (unnamed) or numpy structured - array (named), containing the design point to evaluate. + x (numpy.ndarray): A numpy.ndarray containing the design point + to evaluate. Returns: - numpy.ndarray: The output of this simulation for the input x. + numpy float array: The output of this objective for the input x. """ - # Extract x into xx, if names are used - xx = unpack(x, self.des_type) # Initialize kernel function - ker = g2(self.n, self.o, self.offset) + ker = __g2__(self.n, self.o, self.offset) # Initialize output array fx = np.zeros(self.o) - fx[:] = (1.0 + ker(xx)[0]) + fx[:] = (1.0 + ker(x)) # Calculate the output array for i in range(self.o): for j in range(self.o - 1 - i): - fx[i] *= np.cos(np.pi * xx[j] ** self.alpha / 2) + fx[i] *= np.cos(np.pi * x[j] ** self.alpha / 2) if i > 0: - fx[i] *= np.sin(np.pi * xx[self.o - 1 - i] ** self.alpha / 2) + fx[i] *= np.sin(np.pi * x[self.o - 1 - i] ** self.alpha / 2) return fx -class dtlz5(sim_func): +class dtlz5(__dtlz_base__): """ Class defining the DTLZ5 problem with offset minimizer. DTLZ5 has a lower-dimensional Pareto front embedded in the objective @@ -569,58 +433,34 @@ class dtlz5(sim_func): DTLZ5 has no "local" Pareto fronts, besides the true Pareto front. Contains 2 methods: - * ``__init__(des, sim)`` + * ``__init__(num_des, num_obj)`` * ``__call__(x)`` - The ``__init__`` method inherits from the sim_func ABC. + The ``__init__`` method inherits from the __dtlz_base__ ABC. The ``__call__`` method performs an evaluation of the DTLZ5 problem. """ - def __init__(self, des, num_obj=3, offset=0.5): - """ Constructor for DTLZ5 class. - - Args: - des (np.dtype or int): Either the numpy.dtype of the - design variables or the number of design variables, - assumed to all be continuous and unnamed. - - num_obj (int, optional): The number of objectives, which is - used as the number of simulation outputs. - - offset (optional, float): The location of the global minimizers - is x_i = offset for i = number of objectives, ..., number of - design variables. The default value is offset = 0.5. - - """ - - super().__init__(des) - self.o = num_obj - self.offset = offset - return - def __call__(self, x): - """ Define simulation evaluation. + """ Define objective evaluation. Args: - x (numpy.array): A numpy.ndarray (unnamed) or numpy structured - array (named), containing the design point to evaluate. + x (numpy.ndarray): A numpy.ndarray containing the design point + to evaluate. Returns: - numpy.ndarray: The output of this simulation for the input x. + numpy float array: The output of this objective for the input x. """ - # Extract x into xx, if names are used - xx = unpack(x, self.des_type) # Initialize kernel function - ker = g2(self.n, self.o, self.offset) + ker = __g2__(self.n, self.o, self.offset) # Calculate theta values theta = np.zeros(self.o - 1) - g2x = ker(xx) + g2x = ker(x) for i in range(self.o - 1): - theta[i] = np.pi * (1 + 2 * g2x * xx[i]) / (4 * (1 + g2x)) + theta[i] = np.pi * (1 + 2 * g2x * x[i]) / (4 * (1 + g2x)) # Initialize output array fx = np.zeros(self.o) fx[:] = (1.0 + g2x) @@ -633,7 +473,7 @@ def __call__(self, x): return fx -class dtlz6(sim_func): +class dtlz6(__dtlz_base__): """ Class defining the DTLZ6 problem with offset minimizer. DTLZ6 has a lower-dimensional Pareto front embedded in the objective @@ -642,58 +482,51 @@ class dtlz6(sim_func): improvement until the algorithm is very close to its solution set. Contains 2 methods: - * ``__init__(des, sim)`` + * ``__init__(num_des, num_obj)`` * ``__call__(x)`` - The ``__init__`` method inherits from the sim_func ABC. + The ``__init__`` method inherits from the __dtlz_base__ ABC. The ``__call__`` method performs an evaluation of the DTLZ6 problem. """ - def __init__(self, des, num_obj=3, offset=0.0): - """ Constructor for DTLZ6 class. + def __init__(self, num_des, num_obj=3, offset=0.0): + """ Constructor for DTLZ6, with modified default offset. Args: - des (np.dtype or int): Either the numpy.dtype of the - design variables or the number of design variables, - assumed to all be continuous and unnamed. + num_des (int): The number of design variables. - num_obj (int, optional): The number of objectives, which is - used as the number of simulation outputs. + num_obj (int, optional): The number of objectives. offset (optional, float): The location of the global minimizers - is x_i = offset for i = number of objectives, ..., number of - design variables. The default value is offset = 0.0. + is x_i = offset for i = num_objectives, ..., num_des. + The default offset is 0.0. """ - super().__init__(des) - self.o = num_obj - self.offset = offset + super().__init__(num_des=num_des, num_obj=num_obj, offset=offset) return def __call__(self, x): - """ Define simulation evaluation. + """ Define objective evaluation. Args: - x (numpy.array): A numpy.ndarray (unnamed) or numpy structured - array (named), containing the design point to evaluate. + x (numpy.ndarray): A numpy.ndarray containing the design point + to evaluate. Returns: - numpy.ndarray: The output of this simulation for the input x. + numpy float array: The output of this objective for the input x. """ - # Extract x into xx, if names are used - xx = unpack(x, self.des_type) # Initialize kernel function - ker = g3(self.n, self.o, self.offset) + ker = __g3__(self.n, self.o, self.offset) # Calculate theta values theta = np.zeros(self.o - 1) - g3x = ker(xx) + g3x = ker(x) for i in range(self.o - 1): - theta[i] = np.pi * (1 + 2 * g3x * xx[i]) / (4 * (1 + g3x)) + theta[i] = np.pi * (1 + 2 * g3x * x[i]) / (4 * (1 + g3x)) # Initialize output array fx = np.zeros(self.o) fx[:] = (1.0 + g3x) @@ -706,7 +539,7 @@ def __call__(self, x): return fx -class dtlz7(sim_func): +class dtlz7(__dtlz_base__): """ Class defining the DTLZ7 problem with offset minimizer. DTLZ7 has a discontinuous Pareto front, with solutions on the @@ -715,61 +548,53 @@ class dtlz7(sim_func): F_m = o - F_1 (1 + sin(3pi F_1)) - ... - F_{o-1} (1 + sin3pi F_{o-1}). Contains 2 methods: - * ``__init__(des, sim)`` + * ``__init__(num_des, num_obj)`` * ``__call__(x)`` - The ``__init__`` method inherits from the sim_func ABC. + The ``__init__`` method inherits from the __dtlz_base__ ABC. The ``__call__`` method performs an evaluation of the DTLZ7 problem. """ - def __init__(self, des, num_obj=3, offset=0.0): - """ Constructor for DTLZ7 class. + def __init__(self, num_des, num_obj=3, offset=0.0): + """ Constructor for DTLZ7, with modified default offset. Args: - des (np.dtype or int): Either the numpy.dtype of the - design variables or the number of design variables, - assumed to all be continuous and unnamed. + num_des (int): The number of design variables. - num_obj (int, optional): The number of objectives, which is - used as the number of simulation outputs. + num_obj (int, optional): The number of objectives. offset (optional, float): The location of the global minimizers - is x_i = offset for i = number of objectives, ..., number of - design variables. The default value is offset = 0.0. + is x_i = offset for i = num_objectives, ..., num_des. + The default offset is 0.0. """ - super().__init__(des) - self.o = num_obj - self.offset = offset + super().__init__(num_des=num_des, num_obj=num_obj, offset=offset) return def __call__(self, x): - """ Define simulation evaluation. + """ Define objective evaluation. Args: - x (numpy.array): A numpy.ndarray (unnamed) or numpy structured - array (named), containing the design point to evaluate. + x (numpy.ndarray): A numpy.ndarray containing the design point + to evaluate. Returns: - numpy.ndarray: The output of this simulation for the input x. + numpy float array: The output of this objective for the input x. """ - # Extract x into xx, if names are used - xx = unpack(x, self.des_type) # Initialize kernel function - ker = g4(self.n, self.o, self.offset) + ker = __g4__(self.n, self.o, self.offset) # Initialize first o-1 entries in the output array fx = np.zeros(self.o) - print(xx) - fx[:self.o-1] = xx[:self.o-1] + fx[:self.o-1] = x[:self.o-1] # Calculate kernel functions - gx = 1.0 + ker(xx) - hx = (-np.sum(xx[:self.o-1] * - (1.0 + np.sin(3.0 * np.pi * xx[:self.o-1]) / gx)) + gx = 1.0 + ker(x) + hx = (-np.sum(x[:self.o-1] * + (1.0 + np.sin(3.0 * np.pi * x[:self.o-1]) / gx)) + float(self.o)) # Calculate the last entry in the output array fx[self.o-1] = gx * hx diff --git a/lib/DTLZ/hpo.py b/lib/DTLZ/hpo.py index 533a8f7..77bda3e 100644 --- a/lib/DTLZ/hpo.py +++ b/lib/DTLZ/hpo.py @@ -4,25 +4,21 @@ import numpy as np from deephyper.problem import HpProblem from deephyper.evaluator import profile, RunningJob -from .dtlz import dtlz1, dtlz2, dtlz3, dtlz4, dtlz5, dtlz6, dtlz7 +from dtlz import dtlz2 as DTLZ +# Set problem dims (or read from ENV) nb_dim = os.environ.get("DEEPHYPER_BENCHMARK_NDIMS", 5) -domain = (-32.768, 32.768) +nb_obj = os.environ.get("DEEPHYPER_BENCHMARK_NOBJS", 2) +domain = (0., 1.) +soln_offset = 0.6 + +# Set up problem problem = HpProblem() +dtlz_obj = DTLZ(nb_dim, nb_obj, offset=soln_offset) for i in range(nb_dim): problem.add_hyperparameter(domain, f"x{i}") -def ackley(x, a=20, b=0.2, c=2 * np.pi): - d = len(x) - s1 = np.sum(x**2) - s2 = np.sum(np.cos(c * x)) - term1 = -a * np.exp(-b * np.sqrt(s1 / d)) - term2 = -np.exp(s2 / d) - y = term1 + term2 + a + np.exp(1) - return y - - @profile def run(job: RunningJob, sleep=False, sleep_mean=60, sleep_noise=20) -> dict: @@ -35,8 +31,9 @@ def run(job: RunningJob, sleep=False, sleep_mean=60, sleep_noise=20) -> dict: x = np.array([config[k] for k in config if "x" in k]) x = np.asarray_chkfinite(x) # ValueError if any NaN or Inf + ff = [fi for fi in dtlz_obj(x)] - return -ackley(x) + return ff if __name__ == "__main__": From 56411bc786ad326f94c187761146d89cdc7738ab Mon Sep 17 00:00:00 2001 From: Tyler Date: Sun, 19 Mar 2023 16:13:17 -0500 Subject: [PATCH 03/36] added extra environment vars to dtlz to read problem def --- lib/DTLZ/hpo.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/lib/DTLZ/hpo.py b/lib/DTLZ/hpo.py index 77bda3e..d087eec 100644 --- a/lib/DTLZ/hpo.py +++ b/lib/DTLZ/hpo.py @@ -1,20 +1,26 @@ import os +from importlib import import_module import time import numpy as np from deephyper.problem import HpProblem from deephyper.evaluator import profile, RunningJob -from dtlz import dtlz2 as DTLZ +import dtlz -# Set problem dims (or read from ENV) +# Read DTLZ problem name and acquire pointer +dtlz_prob = os.environ.get("DEEPHYPER_BENCHMARK_DTLZ_PROB", 2) +dtlz_prob_name = f"dtlz{dtlz_prob}" +dtlz_class_ptr = getattr(dtlz, dtlz_prob_name) + +# Read problem dims and definition (or read from ENV) nb_dim = os.environ.get("DEEPHYPER_BENCHMARK_NDIMS", 5) nb_obj = os.environ.get("DEEPHYPER_BENCHMARK_NOBJS", 2) +soln_offset = os.environ.get("DEEPHYPER_BENCHMARK_DTLZ_OFFSET", 0.6) domain = (0., 1.) -soln_offset = 0.6 -# Set up problem +# Create problem problem = HpProblem() -dtlz_obj = DTLZ(nb_dim, nb_obj, offset=soln_offset) +dtlz_obj = dtlz_class_ptr(nb_dim, nb_obj, offset=soln_offset) for i in range(nb_dim): problem.add_hyperparameter(domain, f"x{i}") From da77c2731a6256a51e7a3bbd5b42bf8241138122 Mon Sep 17 00:00:00 2001 From: Tyler Date: Sun, 19 Mar 2023 16:38:53 -0500 Subject: [PATCH 04/36] added dtlz run to examples --- examples/bench_dtlz.py | 29 +++++++++++++++++++++++++++++ lib/DTLZ/hpo.py | 6 +++--- 2 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 examples/bench_dtlz.py diff --git a/examples/bench_dtlz.py b/examples/bench_dtlz.py new file mode 100644 index 0000000..dd9ef83 --- /dev/null +++ b/examples/bench_dtlz.py @@ -0,0 +1,29 @@ +# Setup info-level logging +import logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(filename)s:%(funcName)s - %(message)s", + force=True, +) + +# Set DTLZ problem environment variables +import os +os.environ["DEEPHYPER_BENCHMARK_NDIMS"] = "5" +os.environ["DEEPHYPER_BENCHMARK_NOBJS"] = "3" +os.environ["DEEPHYPER_BENCHMARK_DTLZ_PROB"] = "2" +os.environ["DEEPHYPER_BENCHMARK_DTLZ_OFFSET"] = "0.6" + +# Load DTLZ benchmark suite +import deephyper_benchmark as dhb +# dhb.install("DTLZ") +dhb.load("DTLZ") + +# Run HPO-pipeline with default configuration of hyperparameters +from deephyper_benchmark.lib.dtlz import hpo +from deephyper.evaluator import RunningJob +config = hpo.problem.default_configuration +print(config) +res = hpo.run(RunningJob(parameters=config)) +print(f"{res=}") + + diff --git a/lib/DTLZ/hpo.py b/lib/DTLZ/hpo.py index d087eec..85e0d13 100644 --- a/lib/DTLZ/hpo.py +++ b/lib/DTLZ/hpo.py @@ -13,9 +13,9 @@ dtlz_class_ptr = getattr(dtlz, dtlz_prob_name) # Read problem dims and definition (or read from ENV) -nb_dim = os.environ.get("DEEPHYPER_BENCHMARK_NDIMS", 5) -nb_obj = os.environ.get("DEEPHYPER_BENCHMARK_NOBJS", 2) -soln_offset = os.environ.get("DEEPHYPER_BENCHMARK_DTLZ_OFFSET", 0.6) +nb_dim = int(os.environ.get("DEEPHYPER_BENCHMARK_NDIMS", 5)) +nb_obj = int(os.environ.get("DEEPHYPER_BENCHMARK_NOBJS", 2)) +soln_offset = float(os.environ.get("DEEPHYPER_BENCHMARK_DTLZ_OFFSET", 0.6)) domain = (0., 1.) # Create problem From 9689394646c81f7870546067e3ab57eda4d92434 Mon Sep 17 00:00:00 2001 From: Tyler Date: Mon, 20 Mar 2023 13:05:54 -0500 Subject: [PATCH 05/36] modified bench_dtlz.py to run an optimization round --- examples/bench_dtlz.py | 46 +++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/examples/bench_dtlz.py b/examples/bench_dtlz.py index dd9ef83..bf9e9ce 100644 --- a/examples/bench_dtlz.py +++ b/examples/bench_dtlz.py @@ -2,28 +2,50 @@ import logging logging.basicConfig( level=logging.INFO, - format="%(asctime)s - %(levelname)s - %(filename)s:%(funcName)s - %(message)s", + format="%(asctime)s - %(levelname)s - %(filename)s:%(funcName)s - " + \ + "%(message)s", force=True, ) # Set DTLZ problem environment variables import os -os.environ["DEEPHYPER_BENCHMARK_NDIMS"] = "5" -os.environ["DEEPHYPER_BENCHMARK_NOBJS"] = "3" -os.environ["DEEPHYPER_BENCHMARK_DTLZ_PROB"] = "2" -os.environ["DEEPHYPER_BENCHMARK_DTLZ_OFFSET"] = "0.6" +os.environ["DEEPHYPER_BENCHMARK_NDIMS"] = "5" # 5 vars +os.environ["DEEPHYPER_BENCHMARK_NOBJS"] = "3" # 2 objs +os.environ["DEEPHYPER_BENCHMARK_DTLZ_PROB"] = "2" # DTLZ2 problem +os.environ["DEEPHYPER_BENCHMARK_DTLZ_OFFSET"] = "0.6" # [x_o, .., x_d]*=0.6 # Load DTLZ benchmark suite import deephyper_benchmark as dhb # dhb.install("DTLZ") dhb.load("DTLZ") -# Run HPO-pipeline with default configuration of hyperparameters -from deephyper_benchmark.lib.dtlz import hpo -from deephyper.evaluator import RunningJob -config = hpo.problem.default_configuration -print(config) -res = hpo.run(RunningJob(parameters=config)) -print(f"{res=}") +# Necessary IF statement otherwise it will enter in a infinite loop +# when loading the 'run' function from a subprocess +if __name__ == "__main__": + from deephyper.problem import HpProblem + from deephyper.search.hps import CBO + # Run HPO-pipeline with default configuration of hyperparameters + from deephyper_benchmark.lib.dtlz import hpo + from deephyper.evaluator import RunningJob, Evaluator + config = hpo.problem.default_configuration + print(config) + res = hpo.run(RunningJob(parameters=config)) + print(f"{res=}") + + # define the evaluator to distribute the computation + evaluator = Evaluator.create( + hpo.run, + method="process", + method_kwargs={ + "num_workers": 2, + }, + ) + + # define your search and execute it + search = CBO(hpo.problem, evaluator) + + # solve with 100 evals + results = search.search(max_evals=100) + print(results) From 231e3fe94a3efe34fb65491ed09d86e93ccec306 Mon Sep 17 00:00:00 2001 From: Tyler Date: Mon, 20 Mar 2023 13:14:36 -0500 Subject: [PATCH 06/36] added DTLZ problems to README --- README.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7e82468..00dd965 100644 --- a/README.md +++ b/README.md @@ -66,4 +66,24 @@ from deephyper_benchmark.lib.benchmark_101.hpo import problem, run | LCu | Learning curve hyperparameter optimization benchmark. | | | | | | | PINNBench | Physics Informed Neural Networks Benchmark. | | | | | | | Toy | Toy examples for debugging. | | | | | | -| | | | | | | | \ No newline at end of file +| DTLZ | The modified DTLZ multiobjective test suite. | $\mathbb{R}$ | $\mathbb{R}$ | ✅ | ❌ | configurable | +| | | | | | | | + + + + + + + + + + + + + + + + + + + From 7dcfe0dbe4900dc156a7653968fa6b3af1ebc098 Mon Sep 17 00:00:00 2001 From: Tyler Date: Mon, 20 Mar 2023 13:32:59 -0500 Subject: [PATCH 07/36] updated dir structure to match template, and added README --- lib/DTLZ/README.md | 64 ++++++++++++++++++++++++++++++++++ lib/DTLZ/__init__.py | 1 + lib/DTLZ/hpo.py | 2 +- lib/DTLZ/{dtlz.py => model.py} | 0 4 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 lib/DTLZ/README.md rename lib/DTLZ/{dtlz.py => model.py} (100%) diff --git a/lib/DTLZ/README.md b/lib/DTLZ/README.md new file mode 100644 index 0000000..0f141c4 --- /dev/null +++ b/lib/DTLZ/README.md @@ -0,0 +1,64 @@ + +# Modified Multiobjective DTLZ Test Suite + +This module contains objective function implementations of the DTLZ test +suite, derived from the implementations in +[ParMOO](https://github.com/parmoo/parmoo). + +------------------------------------------------------------------------------ + +For further references, the DTLZ test suite was originally proposed in: + + Deb, Thiele, Laumanns, and Zitzler. "Scalable test problems for + evolutionary multiobjective optimization" in Evolutionary Multiobjective + Optimization, Theoretical Advances and Applications, Ch. 6 (pp. 105--145). + Springer-Verlag, London, UK, 2005. Abraham, Jain, and Goldberg (Eds). + +The original implementation was appropriate for testing randomized algorithms, +but for many deterministic algorithms, the global solutions represent either +best- or worst-case scenarios, so an configurable offset was introduced in: + + Chang. "Mathematical Software for Multiobjective Optimization Problems." + Ph.D. dissertation, Virginia Tech, Dept. of Computer Science, 2020. + +------------------------------------------------------------------------------ + +The full list of public classes in this module includes the 7 unconstrained +DTLZ problems + * ``dtlz1``, + * ``dtlz2``, + * ``dtlz3``, + * ``dtlz4``, + * ``dtlz5``, + * ``dtlz6``, + * ``dtlz7``, + * ``dtlz8``, and + * ``dtlz9`` + +which are selected by setting the environment variable +``DEEPHYPER_BENCHMARK_DTLZ_PROB``. + +## Usage + +To use the benchmark follow this example set of instructions: + +```python + +# Set DTLZ problem environment variables before loading +import os +os.environ["DEEPHYPER_BENCHMARK_NDIMS"] = "5" # 5 vars +os.environ["DEEPHYPER_BENCHMARK_NOBJS"] = "3" # 2 objs +os.environ["DEEPHYPER_BENCHMARK_DTLZ_PROB"] = "2" # DTLZ2 problem +os.environ["DEEPHYPER_BENCHMARK_DTLZ_OFFSET"] = "0.6" # soln [x_o, .., x_n]=0.6 + +# Load & install DTLZ benchmark suite +import deephyper_benchmark as dhb +dhb.install("DTLZ") +dhb.load("DTLZ") + +# Example of running one evaluation of DTLZ problem +from deephyper.evaluator import RunningJob +config = dtlz.hpo.problem.default_configuration # get a default config to test +res = dtlz.hpo.run(RunningJob(parameters=config)) + +``` diff --git a/lib/DTLZ/__init__.py b/lib/DTLZ/__init__.py index e69de29..f102a9c 100644 --- a/lib/DTLZ/__init__.py +++ b/lib/DTLZ/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/lib/DTLZ/hpo.py b/lib/DTLZ/hpo.py index 85e0d13..12c1f60 100644 --- a/lib/DTLZ/hpo.py +++ b/lib/DTLZ/hpo.py @@ -5,7 +5,7 @@ import numpy as np from deephyper.problem import HpProblem from deephyper.evaluator import profile, RunningJob -import dtlz +from . import model as dtlz # Read DTLZ problem name and acquire pointer dtlz_prob = os.environ.get("DEEPHYPER_BENCHMARK_DTLZ_PROB", 2) diff --git a/lib/DTLZ/dtlz.py b/lib/DTLZ/model.py similarity index 100% rename from lib/DTLZ/dtlz.py rename to lib/DTLZ/model.py From 5de6e89aef9c3db0b616ffdf88279b172ee4f2fe Mon Sep 17 00:00:00 2001 From: Tyler Date: Wed, 22 Mar 2023 14:15:04 -0500 Subject: [PATCH 08/36] @Deathn0t comments imlemented --- examples/bench_dtlz.py | 3 +-- lib/C-BBO/ackley/hpo.py | 3 ++- lib/DTLZ/README.md | 3 +-- lib/DTLZ/benchmark.py | 6 +----- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/examples/bench_dtlz.py b/examples/bench_dtlz.py index bf9e9ce..24a527a 100644 --- a/examples/bench_dtlz.py +++ b/examples/bench_dtlz.py @@ -14,9 +14,8 @@ os.environ["DEEPHYPER_BENCHMARK_DTLZ_PROB"] = "2" # DTLZ2 problem os.environ["DEEPHYPER_BENCHMARK_DTLZ_OFFSET"] = "0.6" # [x_o, .., x_d]*=0.6 -# Load DTLZ benchmark suite +# Load DTLZ benchmark suite, nothing to install import deephyper_benchmark as dhb -# dhb.install("DTLZ") dhb.load("DTLZ") diff --git a/lib/C-BBO/ackley/hpo.py b/lib/C-BBO/ackley/hpo.py index d2957a3..3cc0dcf 100644 --- a/lib/C-BBO/ackley/hpo.py +++ b/lib/C-BBO/ackley/hpo.py @@ -1,7 +1,8 @@ import os - import time + import numpy as np + from deephyper.problem import HpProblem from deephyper.evaluator import profile, RunningJob diff --git a/lib/DTLZ/README.md b/lib/DTLZ/README.md index 0f141c4..125363c 100644 --- a/lib/DTLZ/README.md +++ b/lib/DTLZ/README.md @@ -51,9 +51,8 @@ os.environ["DEEPHYPER_BENCHMARK_NOBJS"] = "3" # 2 objs os.environ["DEEPHYPER_BENCHMARK_DTLZ_PROB"] = "2" # DTLZ2 problem os.environ["DEEPHYPER_BENCHMARK_DTLZ_OFFSET"] = "0.6" # soln [x_o, .., x_n]=0.6 -# Load & install DTLZ benchmark suite +# Load DTLZ benchmark suite import deephyper_benchmark as dhb -dhb.install("DTLZ") dhb.load("DTLZ") # Example of running one evaluation of DTLZ problem diff --git a/lib/DTLZ/benchmark.py b/lib/DTLZ/benchmark.py index 1c1a399..2c12251 100644 --- a/lib/DTLZ/benchmark.py +++ b/lib/DTLZ/benchmark.py @@ -1,10 +1,6 @@ -import os - from deephyper_benchmark import * -DIR = os.path.dirname(os.path.abspath(__file__)) - -class DTLZ_lib(Benchmark): +class DTLZBenchmark(Benchmark): version = "0.0.1" From 1e24dcb9403988832647243bd9d2438aac9c04db Mon Sep 17 00:00:00 2001 From: Tyler Date: Tue, 4 Apr 2023 20:05:56 -0500 Subject: [PATCH 09/36] added a module to DTLZ for computing performance metrics --- lib/DTLZ/metrics.py | 189 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 lib/DTLZ/metrics.py diff --git a/lib/DTLZ/metrics.py b/lib/DTLZ/metrics.py new file mode 100644 index 0000000..aefc0c5 --- /dev/null +++ b/lib/DTLZ/metrics.py @@ -0,0 +1,189 @@ +import os +import numpy as np +from deephyper.skopt.moo import pareto_front, hypervolume + + +class PerformanceEvaluator: + """ A class defining performance evaluators for the DTLZ problems. + + Contains the following public methods: + + * `__init__()` constructs a new instance by reading the problem defn + from environment variables, + * `hypervolume(pts)` calculates the total hypervolume dominated by + the current solution, using the Nadir point as the reference point + and filtering out solutions that do not dominate the Nadir point, + * `nadirPt()` calculates the Nadir point for the current problem, + * `numPts(pts)` calculates the number of solution points that dominate + the Nadir point, and + * `rmse(pts)` calculates the RMSE where the error in each point is + approximated by the 2-norm distance to the nearest solution point. + + """ + + def __init__(self): + """ Read the current DTLZ problem defn from environment vars. """ + + self.p_num = os.environ.get("DEEPHYPER_BENCHMARK_DTLZ_PROB", "2") + self.nobjs = int(os.environ.get("DEEPHYPER_BENCHMARK_NOBJS", 2)) + + def hypervolume(self, pts): + """ Calculate the hypervolume dominated by soln, wrt the Nadir point. + + Args: + pts (numpy.ndarra): A 2d array of objective values. + Each row is an objective value in the solution set. + + Returns: + float: The total hypervolume dominated by the current solution, + filtering out points worse than the Nadir point and using the + Nadir point as the reference. + + """ + + filtered_pts = pts.copy() + nadir = self.nadirPt() + for i in range(pts.shape[0]): + if np.any(filtered_pts[i, :] > nadir): + filtered_pts[i, :] = nadir + return hypervolume(filtered_pts, nadir) + + def nadirPt(self): + """ Calculate the Nadir point for the given problem definition. """ + + if self.p_num == "1": + return np.ones(self.nobjs) * 0.5 + elif self.p_num in ["2", "3", "4", "5", "6"]: + return np.ones(self.nobjs) + elif self.p_num == "7": + nadir = np.ones(self.nobjs) + nadir[self.nobjs - 1] = self.nobjs * 2.0 + return nadir + else: + raise ValueError(f"DTLZ{self.p_num} is not a valid problem") + + def numPts(self, pts): + """ Calculate the number of solutions that dominate the Nadir point. + + Args: + pts (numpy.ndarra): A 2d array of objective values. + Each row is an objective value in the solution set. + + Returns: + int: The number of fi in pts such that all(fi < self.nadirPt). + + """ + + pareto_pts = pareto_front(pts) + return sum([all(fi <= self.nadirPt()) for fi in pareto_pts]) + + def rmse(self, pts): + """ Calculate the RMSE for a set of objective points. + + Args: + pts (numpy.ndarra): A 2d array of objective values. + Each row is an objective value in the solution set. + + Returns: + float: The RMSE over all points in pts. + + """ + + pareto_pts = pareto_front(pts) + if self.p_num == "1": + dists = self._dtlz1Dist(pareto_pts) + elif self.p_num in ["2", "3", "4", "5", "6"]: + dists = self._dtlz2Dist(pareto_pts) + elif self.p_num == "7": + dists = self._dtlz7Dist(pareto_pts) + else: + raise ValueError(f"DTLZ{self.p_num} is not a valid problem") + return np.sqrt(np.sum(dists ** 2) / len(dists)) + + def _dtlz1Dist(self, pts): + """ Calculate the distance from each fi to the nearest soln in DTLZ1. + + Args: + pts (numpy.ndarra): A 2d array of objective values. + Each row is an objective value in the solution set. + + Returns: + numpy.ndarray: A 1d array of distances to the nearest solution + point for DTLZ1. + + + """ + + return np.array([np.linalg.norm(0.5 * fi / np.sum(fi) - fi) + for fi in pts]) + + def _dtlz2Dist(self, pts): + """ Calculate the distance from each fi to the unit sphere. + + Note: Works for DTLZ2-6 + + Args: + pts (numpy.ndarra): A 2d array of objective values. + Each row is an objective value in the solution set. + + Returns: + numpy.ndarray: A 1d array of distances to the surface of the + unit sphere. + + """ + + return np.array([np.linalg.norm(fi / np.linalg.norm(fi) - fi) + for fi in pts]) + + def _dtlz7Dist(self, pts): + """ Calculate the distance from each fi to the nearest soln in DTLZ7. + + Args: + pts (numpy.ndarra): A 2d array of objective values. + Each row is an objective value in the solution set. + + Returns: + numpy.ndarray: A 1d array of distances to the nearest solution + point to DTLZ7. + + """ + + # Project each point onto DTLZ7 solution and calculate difference + pts_proj = [] + for fi in pts: + gx = 1.0 + hx = float(self.nobjs) + for j in range(self.nobjs-1): + hx = hx - ((fi[j] / (1.0 + gx)) * (1.0 + np.sin(3.0 * np.pi + * fi[j]))) + pts_proj.append((1.0 + gx) * hx) + return np.array([np.abs(fi[-1] - fj) for fi, fj in zip(pts, pts_proj)]) + + +if __name__ == "__main__": + """ Driver code to test performance metrics. """ + + os.environ["DEEPHYPER_BENCHMARK_DTLZ_PROB"] = "1" # DTLZ1 problem + dtlz1_eval = PerformanceEvaluator() + s1 = np.array([[0.5, 0], [0, 0.5], [.25, .25], [0.2, 0.8]]) + os.environ["DEEPHYPER_BENCHMARK_DTLZ_PROB"] = "2" # DTLZ2 problem + dtlz2_eval = PerformanceEvaluator() + s2 = np.array([[1, 0], [0, 1], [1/np.sqrt(2), 1/np.sqrt(2)], [0.25, 2]]) + os.environ["DEEPHYPER_BENCHMARK_DTLZ_PROB"] = "7" # DTLZ7 problem + dtlz7_eval = PerformanceEvaluator() + s7 = np.array([[0, 4], [1, 3], [.5, 4], [0.5, 6]]) + + assert abs(dtlz1_eval.hypervolume(s1) - .0625) < 1.0e-8 + assert np.all(np.abs(dtlz1_eval.nadirPt() - 0.5) < 1.0e-8) + assert dtlz1_eval.numPts(s1) == 3 + assert abs(dtlz1_eval.rmse(s1)) < 1.0e-8 + + assert abs(dtlz2_eval.hypervolume(s2) - (1.5 - np.sqrt(2))) < 1.0e-8 + assert np.all(np.abs(dtlz2_eval.nadirPt() - 1) < 1.0e-8) + assert dtlz2_eval.numPts(s2) == 3 + assert abs(dtlz2_eval.rmse(s2)) < 1.0e-8 + + assert abs(dtlz7_eval.hypervolume(s7)) < 1.0e-8 + assert np.all(np.abs(dtlz7_eval.nadirPt() - np.array([1, 4])) < 1.0e-8) + assert dtlz7_eval.numPts(s7) == 2 + assert abs(dtlz7_eval.rmse(s7)) < 1.0e-8 From 325b70e83e41abba438d37936f481ce7afbc1204 Mon Sep 17 00:00:00 2001 From: Tyler Date: Tue, 18 Apr 2023 12:38:37 -0500 Subject: [PATCH 10/36] updated dtlz probs for minimization --- lib/DTLZ/hpo.py | 2 +- lib/DTLZ/metrics.py | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/lib/DTLZ/hpo.py b/lib/DTLZ/hpo.py index 12c1f60..76aa7d7 100644 --- a/lib/DTLZ/hpo.py +++ b/lib/DTLZ/hpo.py @@ -37,7 +37,7 @@ def run(job: RunningJob, sleep=False, sleep_mean=60, sleep_noise=20) -> dict: x = np.array([config[k] for k in config if "x" in k]) x = np.asarray_chkfinite(x) # ValueError if any NaN or Inf - ff = [fi for fi in dtlz_obj(x)] + ff = [-fi for fi in dtlz_obj(x)] return ff diff --git a/lib/DTLZ/metrics.py b/lib/DTLZ/metrics.py index aefc0c5..6bf6e85 100644 --- a/lib/DTLZ/metrics.py +++ b/lib/DTLZ/metrics.py @@ -41,7 +41,10 @@ def hypervolume(self, pts): """ - filtered_pts = pts.copy() + if np.any(pts < 0): + filtered_pts = -pts.copy() + else: + filtered_pts = pts.copy() nadir = self.nadirPt() for i in range(pts.shape[0]): if np.any(filtered_pts[i, :] > nadir): @@ -74,7 +77,10 @@ def numPts(self, pts): """ - pareto_pts = pareto_front(pts) + if np.any(pts < 0): + pareto_pts = pareto_front(-pts) + else: + pareto_pts = pareto_front(pts) return sum([all(fi <= self.nadirPt()) for fi in pareto_pts]) def rmse(self, pts): @@ -89,7 +95,10 @@ def rmse(self, pts): """ - pareto_pts = pareto_front(pts) + if np.any(pts < 0): + pareto_pts = pareto_front(-pts) + else: + pareto_pts = pareto_front(pts) if self.p_num == "1": dists = self._dtlz1Dist(pareto_pts) elif self.p_num in ["2", "3", "4", "5", "6"]: From 4ca4e0e5fd800a4bca689eaaa6636e58d29cbf8f Mon Sep 17 00:00:00 2001 From: Tyler Date: Thu, 27 Apr 2023 18:33:24 -0500 Subject: [PATCH 11/36] added details on metrics to README --- lib/DTLZ/README.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/lib/DTLZ/README.md b/lib/DTLZ/README.md index 125363c..2621b19 100644 --- a/lib/DTLZ/README.md +++ b/lib/DTLZ/README.md @@ -21,6 +21,14 @@ best- or worst-case scenarios, so an configurable offset was introduced in: Chang. "Mathematical Software for Multiobjective Optimization Problems." Ph.D. dissertation, Virginia Tech, Dept. of Computer Science, 2020. +Note that the DTLZ problems are minimization problems. Since DeepHyper +maximizes, the implementation herein returns the negative value for each of +the DTLZ objectives. + +Our performance evaluator ``metrics`` scripts can evaluate either the +positive or negative solutions to estimate how well we have solved the +problem. + ------------------------------------------------------------------------------ The full list of public classes in this module includes the 7 unconstrained @@ -61,3 +69,37 @@ config = dtlz.hpo.problem.default_configuration # get a default config to test res = dtlz.hpo.run(RunningJob(parameters=config)) ``` + +## Evaluating Results + +Evaluating the performance of a multiobjective solver is nontrivial. +Typically, one should evaluate on two orthogonal bases: + 1. Quality of solutions -- What is the (average) error in the solutions + returned by the solver? + 2. Diversity of solutions -- How much of the true Pareto front is covered + by these solutions? + +To evaluate these two metrics, we use: + 1. RMSE: Let $F_i$ be a point in the solution set returned by a solver, + and let $Y_i$ be the nearest point to $F_i$ on the true Pareto front, + for $i=1,\ldots, n$. + Then the RMSE is $\sqrt{\sum_{i=1}^n (F_i - Y_i)^2 / n}$. + 2. Hypervolume dominated: Let $F_i$ be defined as above, and let $R$ be + a pre-determined reference point such that all $F_i$ dominate $R$. + Then the hypervolume is given by the volume of the union of all + hyperboxes $B_i$ whose largest vertex is $F_i$ and smallest vertex + is $R$. The value (and usefulness) of the hypervolume metric is extremely + sensitive to the choice of $R$. Therefore, for this problem, we choose + $R$ to be the Nadir point for the true Pareto front. *Note that in order + to use the Nadir point as the reference point, we must throw out every + solution returned by the solver that is worse than the Nadir point. For + extremely difficult problems, this can result in zero hypervolume.* + +For a general problem, the two metrics listed above could be very difficult +to compute and many researchers will use the hypervolume with an overly +pessimistic reference point as a proxy for both quality and diversity. +However, in general, the hypervolume tends to promote diversity over quality. +For the DTLZ problems, since the shape of the true Pareto front is known, +we can calculate each of these metrics, and both the ``rmse(results)`` and +``hypervolume(results)`` functions are implemented in the ``dtlz.metrics`` +module. From 7de31564175972a3059bb42f4aace8358a7b45ec Mon Sep 17 00:00:00 2001 From: Tyler Date: Thu, 27 Apr 2023 18:35:49 -0500 Subject: [PATCH 12/36] updated README --- lib/DTLZ/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/DTLZ/README.md b/lib/DTLZ/README.md index 2621b19..382f08c 100644 --- a/lib/DTLZ/README.md +++ b/lib/DTLZ/README.md @@ -83,7 +83,7 @@ To evaluate these two metrics, we use: 1. RMSE: Let $F_i$ be a point in the solution set returned by a solver, and let $Y_i$ be the nearest point to $F_i$ on the true Pareto front, for $i=1,\ldots, n$. - Then the RMSE is $\sqrt{\sum_{i=1}^n (F_i - Y_i)^2 / n}$. + Then the RMSE is $\sqrt{\sum_{i=1,...,n} (F_i - Y_i)^2 / n}$. 2. Hypervolume dominated: Let $F_i$ be defined as above, and let $R$ be a pre-determined reference point such that all $F_i$ dominate $R$. Then the hypervolume is given by the volume of the union of all From 84ef28199e4365605547e81ff065d0c41b8e7b4e Mon Sep 17 00:00:00 2001 From: Tyler Date: Thu, 27 Apr 2023 18:36:26 -0500 Subject: [PATCH 13/36] updated README --- lib/DTLZ/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/DTLZ/README.md b/lib/DTLZ/README.md index 382f08c..3cb6d6c 100644 --- a/lib/DTLZ/README.md +++ b/lib/DTLZ/README.md @@ -83,7 +83,7 @@ To evaluate these two metrics, we use: 1. RMSE: Let $F_i$ be a point in the solution set returned by a solver, and let $Y_i$ be the nearest point to $F_i$ on the true Pareto front, for $i=1,\ldots, n$. - Then the RMSE is $\sqrt{\sum_{i=1,...,n} (F_i - Y_i)^2 / n}$. + Then the RMSE is $\sqrt{\sum_{i} (F_i - Y_i)^2 / n}$. 2. Hypervolume dominated: Let $F_i$ be defined as above, and let $R$ be a pre-determined reference point such that all $F_i$ dominate $R$. Then the hypervolume is given by the volume of the union of all From ddb2aa1d7735e6e91a65a543605fbfaec37a008f Mon Sep 17 00:00:00 2001 From: Tyler Date: Thu, 27 Apr 2023 18:37:43 -0500 Subject: [PATCH 14/36] updated README --- lib/DTLZ/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/DTLZ/README.md b/lib/DTLZ/README.md index 3cb6d6c..47cac14 100644 --- a/lib/DTLZ/README.md +++ b/lib/DTLZ/README.md @@ -90,10 +90,12 @@ To evaluate these two metrics, we use: hyperboxes $B_i$ whose largest vertex is $F_i$ and smallest vertex is $R$. The value (and usefulness) of the hypervolume metric is extremely sensitive to the choice of $R$. Therefore, for this problem, we choose - $R$ to be the Nadir point for the true Pareto front. *Note that in order + $R$ to be the Nadir point for the true Pareto front. **Note that in order to use the Nadir point as the reference point, we must throw out every solution returned by the solver that is worse than the Nadir point. For - extremely difficult problems, this can result in zero hypervolume.* + extremely difficult problems, this can result in zero hypervolume if no + solutions better than the Nadir point were found. This is most common + for DTLZ1, DTLZ3, and DTLZ7.** For a general problem, the two metrics listed above could be very difficult to compute and many researchers will use the hypervolume with an overly From 690d4386142cd91a84394533e65ba40a17ba7bd0 Mon Sep 17 00:00:00 2001 From: Tyler Date: Fri, 5 May 2023 15:47:54 -0500 Subject: [PATCH 15/36] starting on deephyper jahs bench --- lib/JAHSBench/__init__.py | 1 + lib/JAHSBench/benchmark.py | 6 ++++ lib/JAHSBench/hpo.py | 35 ++++++++++++++++++++ lib/JAHSBench/model.py | 66 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 108 insertions(+) create mode 100644 lib/JAHSBench/__init__.py create mode 100644 lib/JAHSBench/benchmark.py create mode 100644 lib/JAHSBench/hpo.py create mode 100644 lib/JAHSBench/model.py diff --git a/lib/JAHSBench/__init__.py b/lib/JAHSBench/__init__.py new file mode 100644 index 0000000..f102a9c --- /dev/null +++ b/lib/JAHSBench/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/lib/JAHSBench/benchmark.py b/lib/JAHSBench/benchmark.py new file mode 100644 index 0000000..e64b2a9 --- /dev/null +++ b/lib/JAHSBench/benchmark.py @@ -0,0 +1,6 @@ +from deephyper_benchmark import * + + +class JAHS201Benchmark(Benchmark): + + version = "0.0.1" diff --git a/lib/JAHSBench/hpo.py b/lib/JAHSBench/hpo.py new file mode 100644 index 0000000..9293f7f --- /dev/null +++ b/lib/JAHSBench/hpo.py @@ -0,0 +1,35 @@ +import time +import numpy as np +from deephyper.problem import HpProblem +from deephyper.evaluator import profile, RunningJob +from . import model + + +# Create problem +problem = HpProblem() +jahs_obj = model.jahs_bench() + + +@profile +def run(job: RunningJob, sleep=False, sleep_mean=60, sleep_noise=20) -> dict: + + config = job.parameters + + if sleep: + t_sleep = np.random.normal(loc=sleep_mean, scale=sleep_noise) + t_sleep = max(t_sleep, 0) + time.sleep(t_sleep) + + x = np.array([config[k] for k in config if "x" in k]) + x = np.asarray_chkfinite(x) # ValueError if any NaN or Inf + f1, f2 = jahs_obj(x) + + return f1, -f2 + + +if __name__ == "__main__": + print(problem) + default_config = problem.default_configuration + print(f"{default_config=}") + result = run(RunningJob(parameters=default_config)) + print(f"{result=}") diff --git a/lib/JAHSBench/model.py b/lib/JAHSBench/model.py new file mode 100644 index 0000000..e4ddd7a --- /dev/null +++ b/lib/JAHSBench/model.py @@ -0,0 +1,66 @@ +""" This module contains objective function implementations of the JAHS 201 +benchmark suite, implemented as DeepHyper compatible models. + +""" + +class jahs_bench: + """ A callable class implementing the JAHS benchmark problems. """ + + def __init__(self, nepochs=200, dataset="cifar10"): + """ Import and configure the jahs-bench module. + + Args: + nepochs (int, optional): Number of training epochs to use, + defaults to 200. + + dataset (str): One of "cifar10" (default), "colorectal_history", + or "fashion_mnist" + + """ + + from jahs_bench.api import Benchmark + + ### JAHS bench settings ### + self.nepochs = nepochs + MODEL_PATH = "." + # Define the benchmark + self.benchmark = Benchmark( + task=dataset, + save_dir=MODEL_PATH, + kind="surrogate", + download=True + ) + + def __call__(self, x): + """ DeepHyper compatible objective function calling jahs-bench. + + Args: + x (dict): Configuration dictionary with same keys as jahs-bench. + + Returns: + tuple of floats: In order: accuracy (to be maximized), latency + (to be minimized). + + """ + + # Default config + config = { + 'Optimizer': 'SGD', + 'N': 5, + 'W': 16, + 'Resolution': 1.0, + } + # Update config using x + for key in x.keys(): + config[key] = x[key] + # Special rule for setting "TrivialAugment" + if x['TrivialAugment'] == "on": + config['TrivialAugment'] = True + else: + config['TrivialAugment'] = False + # Evaluate and return + fx = np.zeros(2) + result = self.benchmark(config, nepochs=NEPOCHS) + fx[0] = result[self.nepochs]['valid-acc'] + fx[1] = result[self.nepochs]['latency'] + return fx[0], fx[1] From 01875c61444ae9950c87e20401bd747b197ac7fe Mon Sep 17 00:00:00 2001 From: Tyler Date: Tue, 9 May 2023 13:54:25 -0500 Subject: [PATCH 16/36] fixing issues in jahs bench --- lib/JAHSBench/README.md | 108 +++++++++++++++++++++++++++++++++++++++- lib/JAHSBench/hpo.py | 19 ++++++- lib/JAHSBench/model.py | 2 +- 3 files changed, 124 insertions(+), 5 deletions(-) diff --git a/lib/JAHSBench/README.md b/lib/JAHSBench/README.md index 2574284..47cac14 100644 --- a/lib/JAHSBench/README.md +++ b/lib/JAHSBench/README.md @@ -1,3 +1,107 @@ -# JAHS-Bench-201 -* [JAHS-Bench-201 - Github](https://github.com/automl/jahs_bench_201) \ No newline at end of file +# Modified Multiobjective DTLZ Test Suite + +This module contains objective function implementations of the DTLZ test +suite, derived from the implementations in +[ParMOO](https://github.com/parmoo/parmoo). + +------------------------------------------------------------------------------ + +For further references, the DTLZ test suite was originally proposed in: + + Deb, Thiele, Laumanns, and Zitzler. "Scalable test problems for + evolutionary multiobjective optimization" in Evolutionary Multiobjective + Optimization, Theoretical Advances and Applications, Ch. 6 (pp. 105--145). + Springer-Verlag, London, UK, 2005. Abraham, Jain, and Goldberg (Eds). + +The original implementation was appropriate for testing randomized algorithms, +but for many deterministic algorithms, the global solutions represent either +best- or worst-case scenarios, so an configurable offset was introduced in: + + Chang. "Mathematical Software for Multiobjective Optimization Problems." + Ph.D. dissertation, Virginia Tech, Dept. of Computer Science, 2020. + +Note that the DTLZ problems are minimization problems. Since DeepHyper +maximizes, the implementation herein returns the negative value for each of +the DTLZ objectives. + +Our performance evaluator ``metrics`` scripts can evaluate either the +positive or negative solutions to estimate how well we have solved the +problem. + +------------------------------------------------------------------------------ + +The full list of public classes in this module includes the 7 unconstrained +DTLZ problems + * ``dtlz1``, + * ``dtlz2``, + * ``dtlz3``, + * ``dtlz4``, + * ``dtlz5``, + * ``dtlz6``, + * ``dtlz7``, + * ``dtlz8``, and + * ``dtlz9`` + +which are selected by setting the environment variable +``DEEPHYPER_BENCHMARK_DTLZ_PROB``. + +## Usage + +To use the benchmark follow this example set of instructions: + +```python + +# Set DTLZ problem environment variables before loading +import os +os.environ["DEEPHYPER_BENCHMARK_NDIMS"] = "5" # 5 vars +os.environ["DEEPHYPER_BENCHMARK_NOBJS"] = "3" # 2 objs +os.environ["DEEPHYPER_BENCHMARK_DTLZ_PROB"] = "2" # DTLZ2 problem +os.environ["DEEPHYPER_BENCHMARK_DTLZ_OFFSET"] = "0.6" # soln [x_o, .., x_n]=0.6 + +# Load DTLZ benchmark suite +import deephyper_benchmark as dhb +dhb.load("DTLZ") + +# Example of running one evaluation of DTLZ problem +from deephyper.evaluator import RunningJob +config = dtlz.hpo.problem.default_configuration # get a default config to test +res = dtlz.hpo.run(RunningJob(parameters=config)) + +``` + +## Evaluating Results + +Evaluating the performance of a multiobjective solver is nontrivial. +Typically, one should evaluate on two orthogonal bases: + 1. Quality of solutions -- What is the (average) error in the solutions + returned by the solver? + 2. Diversity of solutions -- How much of the true Pareto front is covered + by these solutions? + +To evaluate these two metrics, we use: + 1. RMSE: Let $F_i$ be a point in the solution set returned by a solver, + and let $Y_i$ be the nearest point to $F_i$ on the true Pareto front, + for $i=1,\ldots, n$. + Then the RMSE is $\sqrt{\sum_{i} (F_i - Y_i)^2 / n}$. + 2. Hypervolume dominated: Let $F_i$ be defined as above, and let $R$ be + a pre-determined reference point such that all $F_i$ dominate $R$. + Then the hypervolume is given by the volume of the union of all + hyperboxes $B_i$ whose largest vertex is $F_i$ and smallest vertex + is $R$. The value (and usefulness) of the hypervolume metric is extremely + sensitive to the choice of $R$. Therefore, for this problem, we choose + $R$ to be the Nadir point for the true Pareto front. **Note that in order + to use the Nadir point as the reference point, we must throw out every + solution returned by the solver that is worse than the Nadir point. For + extremely difficult problems, this can result in zero hypervolume if no + solutions better than the Nadir point were found. This is most common + for DTLZ1, DTLZ3, and DTLZ7.** + +For a general problem, the two metrics listed above could be very difficult +to compute and many researchers will use the hypervolume with an overly +pessimistic reference point as a proxy for both quality and diversity. +However, in general, the hypervolume tends to promote diversity over quality. +For the DTLZ problems, since the shape of the true Pareto front is known, +we can calculate each of these metrics, and both the ``rmse(results)`` and +``hypervolume(results)`` functions are implemented in the ``dtlz.metrics`` +module. diff --git a/lib/JAHSBench/hpo.py b/lib/JAHSBench/hpo.py index 9293f7f..9eec173 100644 --- a/lib/JAHSBench/hpo.py +++ b/lib/JAHSBench/hpo.py @@ -1,14 +1,29 @@ import time import numpy as np -from deephyper.problem import HpProblem +from deephyper.problem import NaProblem from deephyper.evaluator import profile, RunningJob from . import model # Create problem -problem = HpProblem() +problem = NaProblem() jahs_obj = model.jahs_bench() +problem.hyperparameter( + LearningRate=(1.0e-3, 1.0), "LearningRate") +problem.add_hyperparameter((1.0e-5, 1.0e-3), "WeightDecay") +# 2 categorical variables +moop_rbf.addDesign({'name': "Activation", + 'des_type': "categorical", + 'levels': ["ReLU", "Hardswish", "Mish"]}) +moop_rbf.addDesign({'name': "TrivialAugment", + 'des_type': "categorical", + 'levels': ["on", "off"]}) +# 6 integer variables +for i in range(1, 7): + moop_rbf.addDesign({'name': f"Op{i}", + 'des_type': "integer", + 'lb': 0, 'ub': 4}) @profile def run(job: RunningJob, sleep=False, sleep_mean=60, sleep_noise=20) -> dict: diff --git a/lib/JAHSBench/model.py b/lib/JAHSBench/model.py index e4ddd7a..7eaf492 100644 --- a/lib/JAHSBench/model.py +++ b/lib/JAHSBench/model.py @@ -60,7 +60,7 @@ def __call__(self, x): config['TrivialAugment'] = False # Evaluate and return fx = np.zeros(2) - result = self.benchmark(config, nepochs=NEPOCHS) + result = self.benchmark(config, nepochs=self.nepochs) fx[0] = result[self.nepochs]['valid-acc'] fx[1] = result[self.nepochs]['latency'] return fx[0], fx[1] From 1db221b808aee12a1d5639a5382b7580947e6078 Mon Sep 17 00:00:00 2001 From: Tyler Date: Tue, 9 May 2023 17:06:42 -0500 Subject: [PATCH 17/36] updated jahs hpo defn --- lib/JAHSBench/hpo.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/lib/JAHSBench/hpo.py b/lib/JAHSBench/hpo.py index 9eec173..91f5f04 100644 --- a/lib/JAHSBench/hpo.py +++ b/lib/JAHSBench/hpo.py @@ -1,29 +1,22 @@ import time import numpy as np -from deephyper.problem import NaProblem +from deephyper.problem import HpProblem from deephyper.evaluator import profile, RunningJob from . import model # Create problem -problem = NaProblem() +problem = HpProblem() jahs_obj = model.jahs_bench() - -problem.hyperparameter( - LearningRate=(1.0e-3, 1.0), "LearningRate") +# 2 continuous hyperparameters +problem.add_hyperparameter((1.0e-3, 1.0), "LearningRate") problem.add_hyperparameter((1.0e-5, 1.0e-3), "WeightDecay") -# 2 categorical variables -moop_rbf.addDesign({'name': "Activation", - 'des_type': "categorical", - 'levels': ["ReLU", "Hardswish", "Mish"]}) -moop_rbf.addDesign({'name': "TrivialAugment", - 'des_type': "categorical", - 'levels': ["on", "off"]}) -# 6 integer variables +# 2 categorical hyperparameters +problem.add_hyperparameter(["ReLU", "Hardswish", "Mish"], "Activation") +problem.add_hyperparameter(["on", "off"], "TrivialAugment") +# 6 categorical architecture design variables for i in range(1, 7): - moop_rbf.addDesign({'name': f"Op{i}", - 'des_type': "integer", - 'lb': 0, 'ub': 4}) + problem.add_hyperparameter([0, 1, 2, 3, 4], f"Op{i}") @profile def run(job: RunningJob, sleep=False, sleep_mean=60, sleep_noise=20) -> dict: From 4a32c5f9dc1a8504df3f8a9b98ebde60f2c93290 Mon Sep 17 00:00:00 2001 From: Tyler Chang Date: Mon, 15 May 2023 12:53:32 -0500 Subject: [PATCH 18/36] fixed minor bugs, updated README --- lib/JAHSBench/README.md | 69 ++++++++++++----------------------------- lib/JAHSBench/hpo.py | 5 +-- lib/JAHSBench/model.py | 2 ++ 3 files changed, 24 insertions(+), 52 deletions(-) diff --git a/lib/JAHSBench/README.md b/lib/JAHSBench/README.md index 47cac14..21b82e0 100644 --- a/lib/JAHSBench/README.md +++ b/lib/JAHSBench/README.md @@ -1,50 +1,26 @@ -# Modified Multiobjective DTLZ Test Suite +# JAHS Benchmark Suite -This module contains objective function implementations of the DTLZ test -suite, derived from the implementations in -[ParMOO](https://github.com/parmoo/parmoo). +This module contains a DeepHyper wrapper for + [JAHS-Bench-201](https://github.com/automl/jahs_bench_201). ------------------------------------------------------------------------------ -For further references, the DTLZ test suite was originally proposed in: +For further information, see: - Deb, Thiele, Laumanns, and Zitzler. "Scalable test problems for - evolutionary multiobjective optimization" in Evolutionary Multiobjective - Optimization, Theoretical Advances and Applications, Ch. 6 (pp. 105--145). - Springer-Verlag, London, UK, 2005. Abraham, Jain, and Goldberg (Eds). - -The original implementation was appropriate for testing randomized algorithms, -but for many deterministic algorithms, the global solutions represent either -best- or worst-case scenarios, so an configurable offset was introduced in: - - Chang. "Mathematical Software for Multiobjective Optimization Problems." - Ph.D. dissertation, Virginia Tech, Dept. of Computer Science, 2020. - -Note that the DTLZ problems are minimization problems. Since DeepHyper -maximizes, the implementation herein returns the negative value for each of -the DTLZ objectives. - -Our performance evaluator ``metrics`` scripts can evaluate either the -positive or negative solutions to estimate how well we have solved the -problem. - ------------------------------------------------------------------------------- - -The full list of public classes in this module includes the 7 unconstrained -DTLZ problems - * ``dtlz1``, - * ``dtlz2``, - * ``dtlz3``, - * ``dtlz4``, - * ``dtlz5``, - * ``dtlz6``, - * ``dtlz7``, - * ``dtlz8``, and - * ``dtlz9`` - -which are selected by setting the environment variable -``DEEPHYPER_BENCHMARK_DTLZ_PROB``. +``` + @inproceedings{NEURIPS2022_fd78f2f6, + author = {Bansal, Archit and Stoll, Danny and Janowski, Maciej and Zela, Arber and Hutter, Frank}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh}, + pages = {38788--38802}, + publisher = {Curran Associates, Inc.}, + title = {JAHS-Bench-201: A Foundation For Research On Joint Architecture And Hyperparameter Search}, + url = {https://proceedings.neurips.cc/paper_files/paper/2022/file/fd78f2f65881c1c7ce47e26b040cf48f-Paper-Datasets_and_Benchmarks.pdf}, + volume = {35}, + year = {2022} + } +``` ## Usage @@ -52,16 +28,9 @@ To use the benchmark follow this example set of instructions: ```python -# Set DTLZ problem environment variables before loading -import os -os.environ["DEEPHYPER_BENCHMARK_NDIMS"] = "5" # 5 vars -os.environ["DEEPHYPER_BENCHMARK_NOBJS"] = "3" # 2 objs -os.environ["DEEPHYPER_BENCHMARK_DTLZ_PROB"] = "2" # DTLZ2 problem -os.environ["DEEPHYPER_BENCHMARK_DTLZ_OFFSET"] = "0.6" # soln [x_o, .., x_n]=0.6 - -# Load DTLZ benchmark suite +# Load JAHS-bench-201 import deephyper_benchmark as dhb -dhb.load("DTLZ") +dhb.load("JAHSBench") # Example of running one evaluation of DTLZ problem from deephyper.evaluator import RunningJob diff --git a/lib/JAHSBench/hpo.py b/lib/JAHSBench/hpo.py index 91f5f04..5114246 100644 --- a/lib/JAHSBench/hpo.py +++ b/lib/JAHSBench/hpo.py @@ -2,7 +2,8 @@ import numpy as np from deephyper.problem import HpProblem from deephyper.evaluator import profile, RunningJob -from . import model +#from . import model +import model # Create problem @@ -30,7 +31,7 @@ def run(job: RunningJob, sleep=False, sleep_mean=60, sleep_noise=20) -> dict: x = np.array([config[k] for k in config if "x" in k]) x = np.asarray_chkfinite(x) # ValueError if any NaN or Inf - f1, f2 = jahs_obj(x) + f1, f2 = jahs_obj(config) return f1, -f2 diff --git a/lib/JAHSBench/model.py b/lib/JAHSBench/model.py index 7eaf492..0325a77 100644 --- a/lib/JAHSBench/model.py +++ b/lib/JAHSBench/model.py @@ -3,6 +3,8 @@ """ +import numpy as np + class jahs_bench: """ A callable class implementing the JAHS benchmark problems. """ From 183f18467f170915bb4927065d77c9a63f9a6bd2 Mon Sep 17 00:00:00 2001 From: Tyler Date: Mon, 15 May 2023 13:10:11 -0500 Subject: [PATCH 19/36] updated JAHS readme --- lib/JAHSBench/README.md | 77 +++++++++++++++++++++-------------------- lib/JAHSBench/hpo.py | 3 +- 2 files changed, 41 insertions(+), 39 deletions(-) diff --git a/lib/JAHSBench/README.md b/lib/JAHSBench/README.md index 21b82e0..e2dc551 100644 --- a/lib/JAHSBench/README.md +++ b/lib/JAHSBench/README.md @@ -4,7 +4,26 @@ This module contains a DeepHyper wrapper for [JAHS-Bench-201](https://github.com/automl/jahs_bench_201). ------------------------------------------------------------------------------- +JAHSBench implements a random forest surrogate model, trained on real-world +performance data for neural networks trained on three standard benchmark +problems: + - ``cifar10`` (default), + - ``colorectal_history``, and + - ``fashion_mnist``. + +Using these models as surrogates for the true performance, we can use this +benchmark problem to study the performance of AutoML techniques on joint +architecture-hyperparameter search tasks at minimal expense. + +The models allow us to tune 2 continuous training hyperparameters +(``LearningRate`` and ``WeightDecay``), +2 categorical training hyperparameters +(``Activation`` and ``TrivialAugment``), and +5 categorical architecture parameters +(``Op{i}`` for ``i=0, ..., 4``). + +The benchmark can be run to tune a single objective (``valid-acc``) or +two objectives (``valid-acc`` and ``latency``). For further information, see: @@ -32,45 +51,29 @@ To use the benchmark follow this example set of instructions: import deephyper_benchmark as dhb dhb.load("JAHSBench") -# Example of running one evaluation of DTLZ problem +# Example of running one evaluation of JAHSBench from deephyper.evaluator import RunningJob -config = dtlz.hpo.problem.default_configuration # get a default config to test -res = dtlz.hpo.run(RunningJob(parameters=config)) +config = jahsbench.hpo.problem.default_configuration # get a default config to test +res = jahsbench.hpo.run(RunningJob(parameters=config)) ``` +Note that the first time that this benchmark is called in a new directory, +the training data must be downloaded and the random forest model must be built. +This may require a significant amount of time. + +After the initial time required to download and build the models, the +surrogate problem should run relatively quickly. + ## Evaluating Results -Evaluating the performance of a multiobjective solver is nontrivial. -Typically, one should evaluate on two orthogonal bases: - 1. Quality of solutions -- What is the (average) error in the solutions - returned by the solver? - 2. Diversity of solutions -- How much of the true Pareto front is covered - by these solutions? - -To evaluate these two metrics, we use: - 1. RMSE: Let $F_i$ be a point in the solution set returned by a solver, - and let $Y_i$ be the nearest point to $F_i$ on the true Pareto front, - for $i=1,\ldots, n$. - Then the RMSE is $\sqrt{\sum_{i} (F_i - Y_i)^2 / n}$. - 2. Hypervolume dominated: Let $F_i$ be defined as above, and let $R$ be - a pre-determined reference point such that all $F_i$ dominate $R$. - Then the hypervolume is given by the volume of the union of all - hyperboxes $B_i$ whose largest vertex is $F_i$ and smallest vertex - is $R$. The value (and usefulness) of the hypervolume metric is extremely - sensitive to the choice of $R$. Therefore, for this problem, we choose - $R$ to be the Nadir point for the true Pareto front. **Note that in order - to use the Nadir point as the reference point, we must throw out every - solution returned by the solver that is worse than the Nadir point. For - extremely difficult problems, this can result in zero hypervolume if no - solutions better than the Nadir point were found. This is most common - for DTLZ1, DTLZ3, and DTLZ7.** - -For a general problem, the two metrics listed above could be very difficult -to compute and many researchers will use the hypervolume with an overly -pessimistic reference point as a proxy for both quality and diversity. -However, in general, the hypervolume tends to promote diversity over quality. -For the DTLZ problems, since the shape of the true Pareto front is known, -we can calculate each of these metrics, and both the ``rmse(results)`` and -``hypervolume(results)`` functions are implemented in the ``dtlz.metrics`` -module. +To evaluate the results, the AutoML team recommends using the validation +error for single-objective runs or the hypervolume metric over both +validation error and evaluation latency for multiobjective-runs. +See their +[Evaluation Protocol](https://automl.github.io/jahs_bench_201/evaluation_protocol) +for more details. + +For multiobjective runs, we recommend a reference point of +``(val_acc = 0, latency=10)``, as discussed in +[this GitHub issue](https://github.com/automl/jahs_bench_201/issues/19). diff --git a/lib/JAHSBench/hpo.py b/lib/JAHSBench/hpo.py index 5114246..e54b77c 100644 --- a/lib/JAHSBench/hpo.py +++ b/lib/JAHSBench/hpo.py @@ -2,8 +2,7 @@ import numpy as np from deephyper.problem import HpProblem from deephyper.evaluator import profile, RunningJob -#from . import model -import model +from . import model # Create problem From 8a24ae56cc77e857d872cbccb7af2fc453fc0cdf Mon Sep 17 00:00:00 2001 From: Tyler Date: Fri, 23 Jun 2023 12:56:35 -0500 Subject: [PATCH 20/36] upated dtlz definition --- lib/DTLZ/model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/DTLZ/model.py b/lib/DTLZ/model.py index 06e82e7..f588399 100644 --- a/lib/DTLZ/model.py +++ b/lib/DTLZ/model.py @@ -467,9 +467,9 @@ def __call__(self, x): # Calculate the output array for i in range(self.o): for j in range(self.o - 1 - i): - fx[i] *= np.cos(np.pi * theta[j] / 2) + fx[i] *= np.cos(theta[j]) if i > 0: - fx[i] *= np.sin(np.pi * theta[self.o - 1 - i] / 2) + fx[i] *= np.sin(theta[self.o - 1 - i]) return fx @@ -533,9 +533,9 @@ def __call__(self, x): # Calculate the output array for i in range(self.o): for j in range(self.o - 1 - i): - fx[i] *= np.cos(np.pi * theta[j] / 2) + fx[i] *= np.cos(theta[j]) if i > 0: - fx[i] *= np.sin(np.pi * theta[self.o - 1 - i] / 2) + fx[i] *= np.sin(theta[self.o - 1 - i]) return fx From 195250b35a36b7807dcd8e8a1138b19c9d6bf584 Mon Sep 17 00:00:00 2001 From: Tyler Date: Mon, 26 Jun 2023 22:24:27 -0500 Subject: [PATCH 21/36] fixed additional typos in dtlz5 and 5 --- lib/DTLZ/model.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/lib/DTLZ/model.py b/lib/DTLZ/model.py index f588399..c37e019 100644 --- a/lib/DTLZ/model.py +++ b/lib/DTLZ/model.py @@ -457,19 +457,20 @@ def __call__(self, x): # Initialize kernel function ker = __g2__(self.n, self.o, self.offset) # Calculate theta values - theta = np.zeros(self.o - 1) + theta = np.zeros(self.o) g2x = ker(x) - for i in range(self.o - 1): - theta[i] = np.pi * (1 + 2 * g2x * x[i]) / (4 * (1 + g2x)) + theta[0] = x[0] + for i in range(1, self.o): + theta[i] = (1 + 2 * g2x * x[i]) / (2 * (1 + g2x)) # Initialize output array fx = np.zeros(self.o) fx[:] = (1.0 + g2x) # Calculate the output array for i in range(self.o): for j in range(self.o - 1 - i): - fx[i] *= np.cos(theta[j]) + fx[i] *= np.cos(np.pi * theta[j] / 2) if i > 0: - fx[i] *= np.sin(theta[self.o - 1 - i]) + fx[i] *= np.sin(np.pi * theta[self.o - 1 - i] / 2) return fx @@ -523,19 +524,20 @@ def __call__(self, x): # Initialize kernel function ker = __g3__(self.n, self.o, self.offset) # Calculate theta values - theta = np.zeros(self.o - 1) + theta = np.zeros(self.o) g3x = ker(x) - for i in range(self.o - 1): - theta[i] = np.pi * (1 + 2 * g3x * x[i]) / (4 * (1 + g3x)) + theta[0] = x[0] + for i in range(1, self.o): + theta[i] = (1 + 2 * g3x * x[i]) / (2 * (1 + g3x)) # Initialize output array fx = np.zeros(self.o) fx[:] = (1.0 + g3x) # Calculate the output array for i in range(self.o): for j in range(self.o - 1 - i): - fx[i] *= np.cos(theta[j]) + fx[i] *= np.cos(np.pi * theta[j] / 2) if i > 0: - fx[i] *= np.sin(theta[self.o - 1 - i]) + fx[i] *= np.sin(np.pi * theta[self.o - 1 - i] / 2) return fx @@ -594,7 +596,7 @@ def __call__(self, x): # Calculate kernel functions gx = 1.0 + ker(x) hx = (-np.sum(x[:self.o-1] * - (1.0 + np.sin(3.0 * np.pi * x[:self.o-1]) / gx)) + (1.0 + np.sin(3.0 * np.pi * x[:self.o-1])) / gx) + float(self.o)) # Calculate the last entry in the output array fx[self.o-1] = gx * hx From e2bfd84286b747daabc82a800921f052b44f8c83 Mon Sep 17 00:00:00 2001 From: Tyler Date: Mon, 26 Jun 2023 22:31:31 -0500 Subject: [PATCH 22/36] dtlz4 bug --- lib/DTLZ/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/DTLZ/model.py b/lib/DTLZ/model.py index c37e019..b46bc0f 100644 --- a/lib/DTLZ/model.py +++ b/lib/DTLZ/model.py @@ -376,7 +376,7 @@ class dtlz4(__dtlz_base__): """ - def __init__(self, num_des, num_obj=3, offset=0.0, alpha=100.0): + def __init__(self, num_des, num_obj=3, offset=0.5, alpha=100.0): """ Constructor for DTLZ7, with modified default offset. Args: From c52157b55110aa3b6e1fc03691b32e79b978e408 Mon Sep 17 00:00:00 2001 From: Tyler Date: Mon, 26 Jun 2023 22:38:55 -0500 Subject: [PATCH 23/36] propogated issue through dtlz7 soln --- lib/DTLZ/metrics.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/DTLZ/metrics.py b/lib/DTLZ/metrics.py index 6bf6e85..923f8aa 100644 --- a/lib/DTLZ/metrics.py +++ b/lib/DTLZ/metrics.py @@ -160,12 +160,11 @@ def _dtlz7Dist(self, pts): # Project each point onto DTLZ7 solution and calculate difference pts_proj = [] for fi in pts: - gx = 1.0 + gx = 2.0 hx = float(self.nobjs) for j in range(self.nobjs-1): - hx = hx - ((fi[j] / (1.0 + gx)) * (1.0 + np.sin(3.0 * np.pi - * fi[j]))) - pts_proj.append((1.0 + gx) * hx) + hx = hx - (fi[j] / (1.0 + np.sin(3.0 * np.pi * fi[j])) / gx) + pts_proj.append(gx * hx) return np.array([np.abs(fi[-1] - fj) for fi, fj in zip(pts, pts_proj)]) From 375ab70ba8f34b01e57de00465fa87d02c25a2a3 Mon Sep 17 00:00:00 2001 From: Tyler Date: Mon, 26 Jun 2023 23:17:36 -0500 Subject: [PATCH 24/36] fixed dtlz7 perf calc --- lib/DTLZ/metrics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/DTLZ/metrics.py b/lib/DTLZ/metrics.py index 923f8aa..29d7d1e 100644 --- a/lib/DTLZ/metrics.py +++ b/lib/DTLZ/metrics.py @@ -161,9 +161,9 @@ def _dtlz7Dist(self, pts): pts_proj = [] for fi in pts: gx = 2.0 - hx = float(self.nobjs) - for j in range(self.nobjs-1): - hx = hx - (fi[j] / (1.0 + np.sin(3.0 * np.pi * fi[j])) / gx) + hx = (-np.sum(fi[:self.nobjs-1] * + (1.0 + np.sin(3.0 * np.pi * fi[:self.nobjs-1])) / gx) + + float(self.nobjs)) pts_proj.append(gx * hx) return np.array([np.abs(fi[-1] - fj) for fi, fj in zip(pts, pts_proj)]) From 15a5e8b7ffb14f80369e5a3525d3bc70ef3dbc2d Mon Sep 17 00:00:00 2001 From: Tyler Date: Wed, 28 Jun 2023 22:41:35 -0500 Subject: [PATCH 25/36] updated JAHS-Bench defn --- lib/JAHSBench/hpo.py | 27 +++++++++++++++++++-------- lib/JAHSBench/model.py | 20 +++++++++----------- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/lib/JAHSBench/hpo.py b/lib/JAHSBench/hpo.py index e54b77c..6daf21b 100644 --- a/lib/JAHSBench/hpo.py +++ b/lib/JAHSBench/hpo.py @@ -5,6 +5,9 @@ from . import model +# Read in whether to do single- or multi-objectives +multiobj = int(os.environ.get("DEEPHYPER_BENCHMARK_MOO", 1)) + # Create problem problem = HpProblem() jahs_obj = model.jahs_bench() @@ -17,22 +20,30 @@ # 6 categorical architecture design variables for i in range(1, 7): problem.add_hyperparameter([0, 1, 2, 3, 4], f"Op{i}") +# 1 integer hyperparameter number of training epochs (1 to 200) +problem.add_hyperparameter((1, 200), "discrete") @profile -def run(job: RunningJob, sleep=False, sleep_mean=60, sleep_noise=20) -> dict: +def run(job: RunningJob, sleep=False, sleep_scale=0.01) -> dict: config = job.parameters + result = jahs_obj(config) if sleep: - t_sleep = np.random.normal(loc=sleep_mean, scale=sleep_noise) - t_sleep = max(t_sleep, 0) + t_sleep = config["runtime"] * sleep_scale time.sleep(t_sleep) - x = np.array([config[k] for k in config if "x" in k]) - x = np.asarray_chkfinite(x) # ValueError if any NaN or Inf - f1, f2 = jahs_obj(config) - - return f1, -f2 + dh_data = {} + dh_data["metadata"] = result + if multiobj: + dh_data["objective"] = [ + config["valid-acc"], + -config["latency"], + -config['size_MB'] + ] + else: + dh_data["objective"] = config["valid-acc"] + return dh_data if __name__ == "__main__": diff --git a/lib/JAHSBench/model.py b/lib/JAHSBench/model.py index 0325a77..7904ae9 100644 --- a/lib/JAHSBench/model.py +++ b/lib/JAHSBench/model.py @@ -8,22 +8,18 @@ class jahs_bench: """ A callable class implementing the JAHS benchmark problems. """ - def __init__(self, nepochs=200, dataset="cifar10"): + def __init__(self, dataset="fashion_mnist"): """ Import and configure the jahs-bench module. Args: - nepochs (int, optional): Number of training epochs to use, - defaults to 200. - - dataset (str): One of "cifar10" (default), "colorectal_history", - or "fashion_mnist" + dataset (str): One of "cifar10", "colorectal_history", + or "fashion_mnist" (default) """ from jahs_bench.api import Benchmark ### JAHS bench settings ### - self.nepochs = nepochs MODEL_PATH = "." # Define the benchmark self.benchmark = Benchmark( @@ -60,9 +56,11 @@ def __call__(self, x): config['TrivialAugment'] = True else: config['TrivialAugment'] = False + # Check for nepochs + nepochs = 200 + if 'nepochs' in x.keys(): + nepochs = x['nepochs'] # Evaluate and return fx = np.zeros(2) - result = self.benchmark(config, nepochs=self.nepochs) - fx[0] = result[self.nepochs]['valid-acc'] - fx[1] = result[self.nepochs]['latency'] - return fx[0], fx[1] + result = self.benchmark(config, nepochs=nepochs) + return result[nepochs] From 5a376e3fdc4e2c970cf6cdf66fd1dbc11f9474a4 Mon Sep 17 00:00:00 2001 From: Tyler Date: Wed, 28 Jun 2023 23:03:53 -0500 Subject: [PATCH 26/36] need os module --- lib/JAHSBench/hpo.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/JAHSBench/hpo.py b/lib/JAHSBench/hpo.py index 6daf21b..36ce166 100644 --- a/lib/JAHSBench/hpo.py +++ b/lib/JAHSBench/hpo.py @@ -1,7 +1,9 @@ -import time +import os import numpy as np -from deephyper.problem import HpProblem +import time + from deephyper.evaluator import profile, RunningJob +from deephyper.problem import HpProblem from . import model From bf3716d0234b0bff7c74ac3d3a21baf2c2ac2302 Mon Sep 17 00:00:00 2001 From: Tyler Chang Date: Fri, 30 Jun 2023 21:32:00 +0000 Subject: [PATCH 27/36] fixed typo in output --- lib/JAHSBench/hpo.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/JAHSBench/hpo.py b/lib/JAHSBench/hpo.py index 36ce166..18d6a62 100644 --- a/lib/JAHSBench/hpo.py +++ b/lib/JAHSBench/hpo.py @@ -30,6 +30,7 @@ def run(job: RunningJob, sleep=False, sleep_scale=0.01) -> dict: config = job.parameters result = jahs_obj(config) + print(result) if sleep: t_sleep = config["runtime"] * sleep_scale @@ -39,12 +40,12 @@ def run(job: RunningJob, sleep=False, sleep_scale=0.01) -> dict: dh_data["metadata"] = result if multiobj: dh_data["objective"] = [ - config["valid-acc"], - -config["latency"], - -config['size_MB'] + result["valid-acc"], + -result["latency"], + -result['size_MB'] ] else: - dh_data["objective"] = config["valid-acc"] + dh_data["objective"] = result["valid-acc"] return dh_data From 519eb3fdda7149781422d2a9d6ba783dd7f7fffa Mon Sep 17 00:00:00 2001 From: Tyler Chang Date: Fri, 30 Jun 2023 21:33:15 +0000 Subject: [PATCH 28/36] deleted errornious print --- lib/JAHSBench/hpo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/JAHSBench/hpo.py b/lib/JAHSBench/hpo.py index 18d6a62..badc087 100644 --- a/lib/JAHSBench/hpo.py +++ b/lib/JAHSBench/hpo.py @@ -30,7 +30,6 @@ def run(job: RunningJob, sleep=False, sleep_scale=0.01) -> dict: config = job.parameters result = jahs_obj(config) - print(result) if sleep: t_sleep = config["runtime"] * sleep_scale From 8b4d5cb3b617a679d31114c058e0f26e64b3ba2c Mon Sep 17 00:00:00 2001 From: Tyler Chang Date: Sat, 8 Jul 2023 01:57:39 +0000 Subject: [PATCH 29/36] changes from polaris --- lib/JAHSBench/hpo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/JAHSBench/hpo.py b/lib/JAHSBench/hpo.py index badc087..9a3e2cc 100644 --- a/lib/JAHSBench/hpo.py +++ b/lib/JAHSBench/hpo.py @@ -23,7 +23,7 @@ for i in range(1, 7): problem.add_hyperparameter([0, 1, 2, 3, 4], f"Op{i}") # 1 integer hyperparameter number of training epochs (1 to 200) -problem.add_hyperparameter((1, 200), "discrete") +problem.add_hyperparameter((1, 200), "nepochs") @profile def run(job: RunningJob, sleep=False, sleep_scale=0.01) -> dict: @@ -32,7 +32,7 @@ def run(job: RunningJob, sleep=False, sleep_scale=0.01) -> dict: result = jahs_obj(config) if sleep: - t_sleep = config["runtime"] * sleep_scale + t_sleep = result["runtime"] * sleep_scale time.sleep(t_sleep) dh_data = {} From 737bef1294393ce8a5ff6dcbae9655edff2f5c61 Mon Sep 17 00:00:00 2001 From: Tyler Date: Wed, 12 Jul 2023 09:11:08 -0500 Subject: [PATCH 30/36] updated JAHSBench docs and install --- lib/JAHSBench/README.md | 44 +++++++++++----- lib/JAHSBench/benchmark.py | 14 ++++++ lib/JAHSBench/metrics.py | 100 +++++++++++++++++++++++++++++++++++++ lib/JAHSBench/model.py | 4 +- 4 files changed, 147 insertions(+), 15 deletions(-) create mode 100644 lib/JAHSBench/metrics.py diff --git a/lib/JAHSBench/README.md b/lib/JAHSBench/README.md index e2dc551..58f081c 100644 --- a/lib/JAHSBench/README.md +++ b/lib/JAHSBench/README.md @@ -1,4 +1,3 @@ - # JAHS Benchmark Suite This module contains a DeepHyper wrapper for @@ -16,14 +15,26 @@ benchmark problem to study the performance of AutoML techniques on joint architecture-hyperparameter search tasks at minimal expense. The models allow us to tune 2 continuous training hyperparameters -(``LearningRate`` and ``WeightDecay``), + - ``LearningRate`` and + - ``WeightDecay``, + 2 categorical training hyperparameters -(``Activation`` and ``TrivialAugment``), and -5 categorical architecture parameters -(``Op{i}`` for ``i=0, ..., 4``). + - ``Activation`` and + - ``TrivialAugment``, + +and 5 categorical architecture parameters + - ``Op{i}`` for ``i=0, ..., 4``. + +For DeepHyper's implementation, we have added 9th integer-valued parameter, +which is the number of epochs trained + - ``nepochs``. + +When run with the option ``wait=True``, ``JAHSBench`` will wait for an +amount of time proportional to the ``runtime`` field returned by +JAHS-Bench-201's surrogates. By default, this is 1% of the true runtime. The benchmark can be run to tune a single objective (``valid-acc``) or -two objectives (``valid-acc`` and ``latency``). +three objectives (``valid-acc``, ``latency``, and ``size_MB``). For further information, see: @@ -47,8 +58,12 @@ To use the benchmark follow this example set of instructions: ```python -# Load JAHS-bench-201 import deephyper_benchmark as dhb + +# Install JAHS-bench-201 and fetch data +dhb.install("JAHSBench") + +# Load JAHS-bench-201 dhb.load("JAHSBench") # Example of running one evaluation of JAHSBench @@ -58,12 +73,15 @@ res = jahsbench.hpo.run(RunningJob(parameters=config)) ``` -Note that the first time that this benchmark is called in a new directory, -the training data must be downloaded and the random forest model must be built. -This may require a significant amount of time. +Note that JAHS-Bench-201 uses XGBoost, which may not be compatible with older +versions of MacOS. +Additionally, the surrogate data has been pickled with an older version +of scikit-learn and newer versions will fail to correctly load the surrogate +models. -After the initial time required to download and build the models, the -surrogate problem should run relatively quickly. +For more information, see the following GitHub issues: + - https://github.com/automl/jahs_bench_201/issues/6 + - https://github.com/automl/jahs_bench_201/issues/18 ## Evaluating Results @@ -75,5 +93,5 @@ See their for more details. For multiobjective runs, we recommend a reference point of -``(val_acc = 0, latency=10)``, as discussed in +``(val_acc = 0, latency=10, size_MB=10000)``, as discussed in [this GitHub issue](https://github.com/automl/jahs_bench_201/issues/19). diff --git a/lib/JAHSBench/benchmark.py b/lib/JAHSBench/benchmark.py index e64b2a9..03b59b2 100644 --- a/lib/JAHSBench/benchmark.py +++ b/lib/JAHSBench/benchmark.py @@ -1,6 +1,20 @@ +import os + from deephyper_benchmark import * +DIR = os.path.dirname(os.path.abspath(__file__)) + class JAHS201Benchmark(Benchmark): version = "0.0.1" + requires = { + "py-pip-requirements": { + "type": "pip", + "name": "-r " + os.path.join(DIR, "REQUIREMENTS.txt"), + }, + "bash-install": { + "type": "cmd", + "cmd": "cd . && bash " + os.path.join(DIR, "./install.sh"), + }, + } diff --git a/lib/JAHSBench/metrics.py b/lib/JAHSBench/metrics.py new file mode 100644 index 0000000..8200d39 --- /dev/null +++ b/lib/JAHSBench/metrics.py @@ -0,0 +1,100 @@ +import os +import numpy as np +from deephyper.skopt.moo import pareto_front, hypervolume + + +class PerformanceEvaluator: + """ A class defining performance evaluators for JAHS Bench 201 problems. + + Contains the following public methods: + + * `__init__()` constructs a new instance by reading the problem defn + from environment variables, + * `hypervolume(pts)` calculates the total hypervolume dominated by + the current solution, using the Nadir point as the reference point + and filtering out solutions that do not dominate the Nadir point, + * `nadirPt()` calculates the Nadir point for the current problem, + * `numPts(pts)` calculates the number of solution points that dominate + the Nadir point, and + + """ + + def __init__(self, p_name="fashion_mnist"): + """ Read the current DTLZ problem defn from environment vars. """ + + self.p_name = p_name + multiobj = int(os.environ.get("DEEPHYPER_BENCHMARK_MOO", 1)) + if multiobj: + self.nobjs = 3 + else: + self.nobjs = 1 + + def hypervolume(self, pts): + """ Calculate the hypervolume dominated by soln, wrt the Nadir point. + + Args: + pts (numpy.ndarray): A 2d array of objective values. + Each row is an objective value in the solution set. + + Returns: + float: The total hypervolume dominated by the current solution, + filtering out points worse than the Nadir point and using the + Nadir point as the reference. + + """ + + if self.nobjs < 2: + raise ValueError("Cannot calculate hypervolume for 1 objective") + if pts.size > 0 and pts[0, 0] > 0: + filtered_pts = -pts.copy() + else: + filtered_pts = pts.copy() + nadir = self.nadirPt() + for i in range(pts.shape[0]): + if np.any(filtered_pts[i, :] > nadir): + filtered_pts[i, :] = nadir + return hypervolume(filtered_pts, nadir) + + def nadirPt(self): + """ Calculate the Nadir point for the given problem definition. """ + + if self.p_name in ["cifar10", "colorectal_history", "fashion_mnist"]: + nadir = np.ones(self.nobjs) + nadir[0] = 0 + if self.nobjs > 1: + nadir[1] = 10.0 + nadir[2] = 100.0 + return nadir + else: + raise ValueError(f"{self.p_name} is not a valid problem") + + def numPts(self, pts): + """ Calculate the number of solutions that dominate the Nadir point. + + Args: + pts (numpy.ndarra): A 2d array of objective values. + Each row is an objective value in the solution set. + + Returns: + int: The number of fi in pts such that all(fi < self.nadirPt). + + """ + + if np.any(pts < 0): + pareto_pts = pareto_front(-pts) + else: + pareto_pts = pareto_front(pts) + return sum([all(fi <= self.nadirPt()) for fi in pareto_pts]) + + +if __name__ == "__main__": + """ Driver code to test performance metrics. """ + + result = np.array([[80, -8, -10], [90, -9, -90], [10, -9.1, -99], [99.0, -1.0, -200.0]]) + + evaluator = PerformanceEvaluator() + + assert abs(evaluator.hypervolume(result) - 14500) < 1.0e-8 + assert evaluator.numPts(result) == 2 + assert np.all(np.abs(evaluator.nadirPt() - np.array([0, 10, 100])) + < 1.0e-8) diff --git a/lib/JAHSBench/model.py b/lib/JAHSBench/model.py index 7904ae9..3e633cc 100644 --- a/lib/JAHSBench/model.py +++ b/lib/JAHSBench/model.py @@ -20,13 +20,13 @@ def __init__(self, dataset="fashion_mnist"): from jahs_bench.api import Benchmark ### JAHS bench settings ### - MODEL_PATH = "." + MODEL_PATH = os.path.dirname(os.path.abspath(__file__)) # Define the benchmark self.benchmark = Benchmark( task=dataset, save_dir=MODEL_PATH, kind="surrogate", - download=True + download=False ) def __call__(self, x): From 8e9e4162040b97b36ad515185770ef31af03f23a Mon Sep 17 00:00:00 2001 From: Tyler Date: Thu, 13 Jul 2023 22:25:33 -0500 Subject: [PATCH 31/36] added REQUIREMENT.txt --- lib/JAHSBench/REQUIREMENTS.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 lib/JAHSBench/REQUIREMENTS.txt diff --git a/lib/JAHSBench/REQUIREMENTS.txt b/lib/JAHSBench/REQUIREMENTS.txt new file mode 100644 index 0000000..702f41c --- /dev/null +++ b/lib/JAHSBench/REQUIREMENTS.txt @@ -0,0 +1,2 @@ +jahs-bench +xgboost From e5d642df68ad84f5b16a78c24e0d0090ad695c27 Mon Sep 17 00:00:00 2001 From: Tyler Date: Thu, 13 Jul 2023 22:25:46 -0500 Subject: [PATCH 32/36] added install script --- lib/JAHSBench/install.sh | 1 + 1 file changed, 1 insertion(+) create mode 100755 lib/JAHSBench/install.sh diff --git a/lib/JAHSBench/install.sh b/lib/JAHSBench/install.sh new file mode 100755 index 0000000..76a97b8 --- /dev/null +++ b/lib/JAHSBench/install.sh @@ -0,0 +1 @@ +python -m jahs_bench.download --target surrogates From 120dc7ecdbbb651f4f8ef2d493236f36d060228b Mon Sep 17 00:00:00 2001 From: Tyler Date: Thu, 13 Jul 2023 23:14:59 -0500 Subject: [PATCH 33/36] added os to model --- lib/JAHSBench/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/JAHSBench/model.py b/lib/JAHSBench/model.py index 3e633cc..6196ab2 100644 --- a/lib/JAHSBench/model.py +++ b/lib/JAHSBench/model.py @@ -18,6 +18,7 @@ def __init__(self, dataset="fashion_mnist"): """ from jahs_bench.api import Benchmark + import os ### JAHS bench settings ### MODEL_PATH = os.path.dirname(os.path.abspath(__file__)) From f9536f8e9a965773bfdf9508c09d1e1f996d775b Mon Sep 17 00:00:00 2001 From: Tyler Date: Thu, 13 Jul 2023 23:19:58 -0500 Subject: [PATCH 34/36] added a random sampler --- lib/JAHSBench/model.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/JAHSBench/model.py b/lib/JAHSBench/model.py index 6196ab2..1a0fa69 100644 --- a/lib/JAHSBench/model.py +++ b/lib/JAHSBench/model.py @@ -30,6 +30,18 @@ def __init__(self, dataset="fashion_mnist"): download=False ) + def __sample__(self): + """ Randomly sample a JAHS-Bench-201 configuration. + + Returns: + dict: A configuration dictionary. + + """ + + config = self.benchmark.sample_config() + config['nepochs'] = np.random.randint(1, 200) + return config + def __call__(self, x): """ DeepHyper compatible objective function calling jahs-bench. From ea4868f0acd7184c69dbac5fedb52e1c49856e84 Mon Sep 17 00:00:00 2001 From: Tyler Date: Thu, 13 Jul 2023 23:32:57 -0500 Subject: [PATCH 35/36] update READMe with row for JAHSBench --- README.md | 1 + lib/JAHSBench/README.md | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index daafed6..0583de9 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,7 @@ The `@profile` decorator should be used on all `run`-functions to collect the `t | PINNBench | Physics Informed Neural Networks Benchmark. | $\mathbb{R}\times\mathbb{N}\times\mathbb{C}$ | $\mathbb{R}$ | ❌ | ✅ | ms | | Toy | Toy examples for debugging. | | | | | | | DTLZ | The modified DTLZ multiobjective test suite. | $\mathbb{R}$ | $\mathbb{R}$ | ✅ | ❌ | configurable | +| JAHSBench | A slightly modified JAHSBench 201 wrapper. | $\mathbb{R}^2\times{\text categorical}^8\mathbb{Z}$ | $\mathbb{R}$ | ✅ | ❌ | configurable | | | | | | | | | diff --git a/lib/JAHSBench/README.md b/lib/JAHSBench/README.md index 58f081c..2131053 100644 --- a/lib/JAHSBench/README.md +++ b/lib/JAHSBench/README.md @@ -66,10 +66,12 @@ dhb.install("JAHSBench") # Load JAHS-bench-201 dhb.load("JAHSBench") +from deephyper_benchmark.lib.jahsbench import hpo + # Example of running one evaluation of JAHSBench from deephyper.evaluator import RunningJob -config = jahsbench.hpo.problem.default_configuration # get a default config to test -res = jahsbench.hpo.run(RunningJob(parameters=config)) +config = hpo.problem.jahs_obj.__sample__() # get a default config to test +res = hpo.run(RunningJob(parameters=config)) ``` @@ -93,5 +95,15 @@ See their for more details. For multiobjective runs, we recommend a reference point of -``(val_acc = 0, latency=10, size_MB=10000)``, as discussed in +``(val_acc = 0, latency=10, size_MB=100)``, as discussed in [this GitHub issue](https://github.com/automl/jahs_bench_201/issues/19). + +To evaluate hypervolume with this reference point, use our metrics + +```python + +from deephyper_benchmark.lib.jahsbench import metrics +evaluator = metrics.PerformanceEvaluator() +hv = evaluator.hypervolume(res) + +``` From ba47d3151764b4d3635e90de145f3eceddfe1c1f Mon Sep 17 00:00:00 2001 From: Tyler H Chang Date: Thu, 13 Jul 2023 23:34:51 -0500 Subject: [PATCH 36/36] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0583de9..77b4d20 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,7 @@ The `@profile` decorator should be used on all `run`-functions to collect the `t | PINNBench | Physics Informed Neural Networks Benchmark. | $\mathbb{R}\times\mathbb{N}\times\mathbb{C}$ | $\mathbb{R}$ | ❌ | ✅ | ms | | Toy | Toy examples for debugging. | | | | | | | DTLZ | The modified DTLZ multiobjective test suite. | $\mathbb{R}$ | $\mathbb{R}$ | ✅ | ❌ | configurable | -| JAHSBench | A slightly modified JAHSBench 201 wrapper. | $\mathbb{R}^2\times{\text categorical}^8\mathbb{Z}$ | $\mathbb{R}$ | ✅ | ❌ | configurable | +| JAHSBench | A slightly modified JAHSBench 201 wrapper. | $\mathbb{R}^2\times\text{categorical}^8\times\mathbb{Z}$ | $\mathbb{R}$ | ✅ | ❌ | configurable | | | | | | | | |