deephyper · Deathn0t · Jul 17, 2023 · Mar 19, 2023 · Mar 19, 2023 · Mar 19, 2023
diff --git a/README.md b/README.md
@@ -127,6 +127,7 @@ The `@profile` decorator should be used on all `run`-functions to collect the `t
 | PINNBench  | Physics Informed Neural Networks Benchmark.                                  | $\mathbb{R}\times\mathbb{N}\times\mathbb{C}$ | $\mathbb{R}$      | ✅              | ✅             | ms                  |
 | Toy        | Toy examples for debugging.                                                  |                                              |                   |                 |                |                     |
 | DTLZ       | The modified DTLZ multiobjective test suite.                                 |  $\mathbb{R}$                                |  $\mathbb{R}$     | ✅              |  ❌            | configurable        |
+| JAHSBench  | A slightly modified JAHSBench 201 wrapper.                                   |  $\mathbb{R}^2\times\text{categorical}^8\times\mathbb{Z}$                               |  $\mathbb{R}$     | ✅              |  ❌            | configurable        |
 |                |                                                                          |                                              |                   |                 |                |                     |
 
 

diff --git a/lib/DTLZ/metrics.py b/lib/DTLZ/metrics.py
@@ -160,12 +160,11 @@ def _dtlz7Dist(self, pts):
         # Project each point onto DTLZ7 solution and calculate difference
         pts_proj = []
         for fi in pts:
-            gx = 1.0
-            hx = float(self.nobjs)
-            for j in range(self.nobjs-1):
-                hx = hx - ((fi[j] / (1.0 + gx)) * (1.0 + np.sin(3.0 * np.pi
-                                                                * fi[j])))
-            pts_proj.append((1.0 + gx) * hx)
+            gx = 2.0
+            hx = (-np.sum(fi[:self.nobjs-1] *
+                  (1.0 + np.sin(3.0 * np.pi * fi[:self.nobjs-1])) / gx)
+                  + float(self.nobjs))
+            pts_proj.append(gx * hx)
         return np.array([np.abs(fi[-1] - fj) for fi, fj in zip(pts, pts_proj)])
 
 

diff --git a/lib/DTLZ/model.py b/lib/DTLZ/model.py
@@ -376,7 +376,7 @@ class dtlz4(__dtlz_base__):
 
     """
 
-    def __init__(self, num_des, num_obj=3, offset=0.0, alpha=100.0):
+    def __init__(self, num_des, num_obj=3, offset=0.5, alpha=100.0):
         """ Constructor for DTLZ7, with modified default offset.
 
         Args:
@@ -457,10 +457,11 @@ def __call__(self, x):
         # Initialize kernel function
         ker = __g2__(self.n, self.o, self.offset)
         # Calculate theta values
-        theta = np.zeros(self.o - 1)
+        theta = np.zeros(self.o)
         g2x = ker(x)
-        for i in range(self.o - 1):
-            theta[i] = np.pi * (1 + 2 * g2x * x[i]) / (4 * (1 + g2x))
+        theta[0] = x[0]
+        for i in range(1, self.o):
+            theta[i] = (1 + 2 * g2x * x[i]) / (2 * (1 + g2x))
         # Initialize output array
         fx = np.zeros(self.o)
         fx[:] = (1.0 + g2x)
@@ -523,10 +524,11 @@ def __call__(self, x):
         # Initialize kernel function
         ker = __g3__(self.n, self.o, self.offset)
         # Calculate theta values
-        theta = np.zeros(self.o - 1)
+        theta = np.zeros(self.o)
         g3x = ker(x)
-        for i in range(self.o - 1):
-            theta[i] = np.pi * (1 + 2 * g3x * x[i]) / (4 * (1 + g3x))
+        theta[0] = x[0]
+        for i in range(1, self.o):
+            theta[i] = (1 + 2 * g3x * x[i]) / (2 * (1 + g3x))
         # Initialize output array
         fx = np.zeros(self.o)
         fx[:] = (1.0 + g3x)
@@ -594,7 +596,7 @@ def __call__(self, x):
         # Calculate kernel functions
         gx = 1.0 + ker(x)
         hx = (-np.sum(x[:self.o-1] *
-                      (1.0 + np.sin(3.0 * np.pi * x[:self.o-1]) / gx))
+                      (1.0 + np.sin(3.0 * np.pi * x[:self.o-1])) / gx)
                       + float(self.o))
         # Calculate the last entry in the output array
         fx[self.o-1] = gx * hx

diff --git a/lib/JAHSBench/README.md b/lib/JAHSBench/README.md
@@ -1,3 +1,109 @@
-# JAHS-Bench-201
+# JAHS Benchmark Suite
 
-* [JAHS-Bench-201 - Github](https://github.com/automl/jahs_bench_201)
+This module contains a DeepHyper wrapper for
+ [JAHS-Bench-201](https://github.com/automl/jahs_bench_201).
+
+JAHSBench implements a random forest surrogate model, trained on real-world
+performance data for neural networks trained on three standard benchmark
+problems:
+ - ``cifar10`` (default),
+ - ``colorectal_history``, and
+ - ``fashion_mnist``.
+
+Using these models as surrogates for the true performance, we can use this
+benchmark problem to study the performance of AutoML techniques on joint
+architecture-hyperparameter search tasks at minimal expense.
+
+The models allow us to tune 2 continuous training hyperparameters
+ - ``LearningRate`` and
+ - ``WeightDecay``,
+
+2 categorical training hyperparameters
+ - ``Activation`` and
+ - ``TrivialAugment``,
+
+and 5 categorical architecture parameters
+ - ``Op{i}`` for ``i=0, ..., 4``.
+
+For DeepHyper's implementation, we have added 9th integer-valued parameter,
+which is the number of epochs trained
+ - ``nepochs``.
+
+When run with the option ``wait=True``, ``JAHSBench`` will wait for an
+amount of time proportional to the ``runtime`` field returned by
+JAHS-Bench-201's surrogates. By default, this is 1% of the true runtime.
+
+The benchmark can be run to tune a single objective (``valid-acc``) or
+three objectives (``valid-acc``, ``latency``, and ``size_MB``).
+
+For further information, see:
+
+```
+    @inproceedings{NEURIPS2022_fd78f2f6,
+        author = {Bansal, Archit and Stoll, Danny and Janowski, Maciej and Zela, Arber and Hutter, Frank},
+        booktitle = {Advances in Neural Information Processing Systems},
+        editor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh},
+        pages = {38788--38802},
+        publisher = {Curran Associates, Inc.},
+        title = {JAHS-Bench-201: A Foundation For Research On Joint Architecture And Hyperparameter Search},
+        url = {https://proceedings.neurips.cc/paper_files/paper/2022/file/fd78f2f65881c1c7ce47e26b040cf48f-Paper-Datasets_and_Benchmarks.pdf},
+        volume = {35},
+        year = {2022}
+    }
+```
+
+## Usage
+
+To use the benchmark follow this example set of instructions:
+
+```python
+
+import deephyper_benchmark as dhb
+
+# Install JAHS-bench-201 and fetch data
+dhb.install("JAHSBench")
+
+# Load JAHS-bench-201
+dhb.load("JAHSBench")
+
+from deephyper_benchmark.lib.jahsbench import hpo
+
+# Example of running one evaluation of JAHSBench
+from deephyper.evaluator import RunningJob
+config = hpo.problem.jahs_obj.__sample__() # get a default config to test
+res = hpo.run(RunningJob(parameters=config))
+
+```
+
+Note that JAHS-Bench-201 uses XGBoost, which may not be compatible with older
+versions of MacOS.
+Additionally, the surrogate data has been pickled with an older version
+of scikit-learn and newer versions will fail to correctly load the surrogate
+models.
+
+For more information, see the following GitHub issues:
+ - https://github.com/automl/jahs_bench_201/issues/6
+ - https://github.com/automl/jahs_bench_201/issues/18
+
+## Evaluating Results
+
+To evaluate the results, the AutoML team recommends using the validation
+error for single-objective runs or the hypervolume metric over both
+validation error and evaluation latency for multiobjective-runs.
+See their
+[Evaluation Protocol](https://automl.github.io/jahs_bench_201/evaluation_protocol)
+for more details.
+
+For multiobjective runs, we recommend a reference point of 
+``(val_acc = 0, latency=10, size_MB=100)``, as discussed in 
+[this GitHub issue](https://github.com/automl/jahs_bench_201/issues/19).
+
+To evaluate hypervolume with this reference point, use our metrics
+
+```python
+
+from deephyper_benchmark.lib.jahsbench import metrics
+evaluator = metrics.PerformanceEvaluator()
+hv = evaluator.hypervolume(res)
+
+```
diff --git a/lib/JAHSBench/REQUIREMENTS.txt b/lib/JAHSBench/REQUIREMENTS.txt
@@ -0,0 +1,2 @@
+jahs-bench
+xgboost
diff --git a/lib/JAHSBench/__init__.py b/lib/JAHSBench/__init__.py
@@ -0,0 +1 @@
+__version__ = "0.0.1"
diff --git a/lib/JAHSBench/benchmark.py b/lib/JAHSBench/benchmark.py
@@ -0,0 +1,20 @@
+import os
+
+from deephyper_benchmark import *
+
+DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+class JAHS201Benchmark(Benchmark):
+
+    version = "0.0.1"
+    requires = {
+        "py-pip-requirements": {
+            "type": "pip",
+            "name": "-r " + os.path.join(DIR, "REQUIREMENTS.txt"),
+        },
+        "bash-install": {
+            "type": "cmd",
+            "cmd": "cd . && bash " + os.path.join(DIR, "./install.sh"),
+        },
+    }
diff --git a/lib/JAHSBench/hpo.py b/lib/JAHSBench/hpo.py
@@ -0,0 +1,56 @@
+import os
+import numpy as np
+import time
+
+from deephyper.evaluator import profile, RunningJob
+from deephyper.problem import HpProblem
+from . import model
+
+
+# Read in whether to do single- or multi-objectives
+multiobj = int(os.environ.get("DEEPHYPER_BENCHMARK_MOO", 1))
+
+# Create problem
+problem = HpProblem()
+jahs_obj = model.jahs_bench()
+# 2 continuous hyperparameters
+problem.add_hyperparameter((1.0e-3, 1.0), "LearningRate")
+problem.add_hyperparameter((1.0e-5, 1.0e-3), "WeightDecay")
+# 2 categorical hyperparameters
+problem.add_hyperparameter(["ReLU", "Hardswish", "Mish"], "Activation")
+problem.add_hyperparameter(["on", "off"], "TrivialAugment")
+# 6 categorical architecture design variables
+for i in range(1, 7):
+    problem.add_hyperparameter([0, 1, 2, 3, 4], f"Op{i}")
+# 1 integer hyperparameter number of training epochs (1 to 200)
+problem.add_hyperparameter((1, 200), "nepochs")
+
+@profile
+def run(job: RunningJob, sleep=False, sleep_scale=0.01) -> dict:
+
+    config = job.parameters
+    result = jahs_obj(config)
+
+    if sleep:
+        t_sleep = result["runtime"] * sleep_scale
+        time.sleep(t_sleep)
+
+    dh_data = {}
+    dh_data["metadata"] = result
+    if multiobj:
+        dh_data["objective"] = [
+                                result["valid-acc"],
+                                -result["latency"],
+                                -result['size_MB']
+                               ]
+    else:
+        dh_data["objective"] = result["valid-acc"]
+    return dh_data
+
+
+if __name__ == "__main__":
+    print(problem)
+    default_config = problem.default_configuration
+    print(f"{default_config=}")
+    result = run(RunningJob(parameters=default_config))
+    print(f"{result=}")
diff --git a/lib/JAHSBench/install.sh b/lib/JAHSBench/install.sh
@@ -0,0 +1 @@
+python -m jahs_bench.download --target surrogates
diff --git a/lib/JAHSBench/metrics.py b/lib/JAHSBench/metrics.py
@@ -0,0 +1,100 @@
+import os
+import numpy as np
+from deephyper.skopt.moo import pareto_front, hypervolume
+
+
+class PerformanceEvaluator:
+    """ A class defining performance evaluators for JAHS Bench 201 problems.
+
+    Contains the following public methods:
+
+     * `__init__()` constructs a new instance by reading the problem defn
+       from environment variables,
+     * `hypervolume(pts)` calculates the total hypervolume dominated by
+       the current solution, using the Nadir point as the reference point
+       and filtering out solutions that do not dominate the Nadir point,
+     * `nadirPt()` calculates the Nadir point for the current problem,
+     * `numPts(pts)` calculates the number of solution points that dominate
+       the Nadir point, and
+
+    """
+
+    def __init__(self, p_name="fashion_mnist"):
+        """ Read the current DTLZ problem defn from environment vars. """
+
+        self.p_name = p_name
+        multiobj = int(os.environ.get("DEEPHYPER_BENCHMARK_MOO", 1))
+        if multiobj:
+            self.nobjs = 3
+        else:
+            self.nobjs = 1
+
+    def hypervolume(self, pts):
+        """ Calculate the hypervolume dominated by soln, wrt the Nadir point.
+
+        Args:
+            pts (numpy.ndarray): A 2d array of objective values.
+                Each row is an objective value in the solution set.
+
+        Returns:
+            float: The total hypervolume dominated by the current solution,
+            filtering out points worse than the Nadir point and using the
+            Nadir point as the reference.
+
+        """
+
+        if self.nobjs < 2:
+            raise ValueError("Cannot calculate hypervolume for 1 objective")
+        if pts.size > 0 and pts[0, 0] > 0:
+            filtered_pts = -pts.copy()
+        else:
+            filtered_pts = pts.copy()
+        nadir = self.nadirPt()
+        for i in range(pts.shape[0]):
+            if np.any(filtered_pts[i, :] > nadir):
+                filtered_pts[i, :] = nadir
+        return hypervolume(filtered_pts, nadir)
+
+    def nadirPt(self):
+        """ Calculate the Nadir point for the given problem definition. """
+
+        if self.p_name in ["cifar10", "colorectal_history", "fashion_mnist"]:
+            nadir = np.ones(self.nobjs)
+            nadir[0] = 0
+            if self.nobjs > 1:
+                nadir[1] = 10.0
+                nadir[2] = 100.0
+            return nadir
+        else:
+            raise ValueError(f"{self.p_name} is not a valid problem")
+
+    def numPts(self, pts):
+        """ Calculate the number of solutions that dominate the Nadir point.
+
+        Args:
+            pts (numpy.ndarra): A 2d array of objective values.
+                Each row is an objective value in the solution set.
+
+        Returns:
+            int: The number of fi in pts such that all(fi < self.nadirPt).
+
+        """
+
+        if np.any(pts < 0):
+            pareto_pts = pareto_front(-pts)
+        else:
+            pareto_pts = pareto_front(pts)
+        return sum([all(fi <= self.nadirPt()) for fi in pareto_pts])
+
+
+if __name__ == "__main__":
+    """ Driver code to test performance metrics. """
+
+    result = np.array([[80, -8, -10], [90, -9, -90], [10, -9.1, -99], [99.0, -1.0, -200.0]])
+
+    evaluator = PerformanceEvaluator()
+
+    assert abs(evaluator.hypervolume(result) - 14500) < 1.0e-8
+    assert evaluator.numPts(result) == 2
+    assert np.all(np.abs(evaluator.nadirPt() - np.array([0, 10, 100]))
+                  < 1.0e-8)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		python -m jahs_bench.download --target surrogates