Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JAHSBench #19

Merged
merged 41 commits into from
Jul 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
8758423
added some files copied from parmoo.sim_funcs and Ackley C-BBO
thchang Mar 19, 2023
bb6e219
modified dtlz from parmoo interface to match deephyper benchmark inte…
thchang Mar 19, 2023
56411bc
added extra environment vars to dtlz to read problem def
thchang Mar 19, 2023
da77c27
added dtlz run to examples
thchang Mar 19, 2023
9689394
modified bench_dtlz.py to run an optimization round
thchang Mar 20, 2023
231e3fe
added DTLZ problems to README
thchang Mar 20, 2023
7dcfe0d
updated dir structure to match template, and added README
thchang Mar 20, 2023
5de6e89
@Deathn0t comments imlemented
thchang Mar 22, 2023
4a33011
Merge branch 'deephyper:main' into main
thchang Apr 4, 2023
1e24dcb
added a module to DTLZ for computing performance metrics
thchang Apr 5, 2023
325b70e
updated dtlz probs for minimization
thchang Apr 18, 2023
c49948b
Merge branch 'deephyper:main' into main
thchang Apr 27, 2023
4ca4e0e
added details on metrics to README
thchang Apr 27, 2023
7de3156
updated README
thchang Apr 27, 2023
84ef281
updated README
thchang Apr 27, 2023
ddb2aa1
updated README
thchang Apr 27, 2023
690d438
starting on deephyper jahs bench
thchang May 5, 2023
7284970
Merge branch 'deephyper:main' into main
thchang May 5, 2023
01875c6
fixing issues in jahs bench
thchang May 9, 2023
1db221b
updated jahs hpo defn
thchang May 9, 2023
4a32c5f
fixed minor bugs, updated README
May 15, 2023
183f184
updated JAHS readme
thchang May 15, 2023
79720fd
Merge branch 'deephyper:main' into main
thchang Jun 23, 2023
8a24ae5
upated dtlz definition
thchang Jun 23, 2023
195250b
fixed additional typos in dtlz5 and 5
thchang Jun 27, 2023
e2bfd84
dtlz4 bug
thchang Jun 27, 2023
c52157b
propogated issue through dtlz7 soln
thchang Jun 27, 2023
375ab70
fixed dtlz7 perf calc
thchang Jun 27, 2023
15a5e8b
updated JAHS-Bench defn
thchang Jun 29, 2023
5a376e3
need os module
thchang Jun 29, 2023
bf3716d
fixed typo in output
Jun 30, 2023
519eb3f
deleted errornious print
Jun 30, 2023
8b4d5cb
changes from polaris
Jul 8, 2023
737bef1
updated JAHSBench docs and install
thchang Jul 12, 2023
8e9e416
added REQUIREMENT.txt
thchang Jul 14, 2023
e5d642d
added install script
thchang Jul 14, 2023
120dc7e
added os to model
thchang Jul 14, 2023
f9536f8
added a random sampler
thchang Jul 14, 2023
ea4868f
update READMe with row for JAHSBench
thchang Jul 14, 2023
ba47d31
Update README.md
thchang Jul 14, 2023
2c6c6a4
Merge branch 'deephyper:main' into main
thchang Jul 14, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ The `@profile` decorator should be used on all `run`-functions to collect the `t
| PINNBench | Physics Informed Neural Networks Benchmark. | $\mathbb{R}\times\mathbb{N}\times\mathbb{C}$ | $\mathbb{R}$ | ✅ | ✅ | ms |
| Toy | Toy examples for debugging. | | | | | |
| DTLZ | The modified DTLZ multiobjective test suite. | $\mathbb{R}$ | $\mathbb{R}$ | ✅ | ❌ | configurable |
| JAHSBench | A slightly modified JAHSBench 201 wrapper. | $\mathbb{R}^2\times\text{categorical}^8\times\mathbb{Z}$ | $\mathbb{R}$ | ✅ | ❌ | configurable |
| | | | | | | |


Expand Down
11 changes: 5 additions & 6 deletions lib/DTLZ/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,11 @@ def _dtlz7Dist(self, pts):
# Project each point onto DTLZ7 solution and calculate difference
pts_proj = []
for fi in pts:
gx = 1.0
hx = float(self.nobjs)
for j in range(self.nobjs-1):
hx = hx - ((fi[j] / (1.0 + gx)) * (1.0 + np.sin(3.0 * np.pi
* fi[j])))
pts_proj.append((1.0 + gx) * hx)
gx = 2.0
hx = (-np.sum(fi[:self.nobjs-1] *
(1.0 + np.sin(3.0 * np.pi * fi[:self.nobjs-1])) / gx)
+ float(self.nobjs))
pts_proj.append(gx * hx)
return np.array([np.abs(fi[-1] - fj) for fi, fj in zip(pts, pts_proj)])


Expand Down
18 changes: 10 additions & 8 deletions lib/DTLZ/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ class dtlz4(__dtlz_base__):

"""

def __init__(self, num_des, num_obj=3, offset=0.0, alpha=100.0):
def __init__(self, num_des, num_obj=3, offset=0.5, alpha=100.0):
""" Constructor for DTLZ7, with modified default offset.

Args:
Expand Down Expand Up @@ -457,10 +457,11 @@ def __call__(self, x):
# Initialize kernel function
ker = __g2__(self.n, self.o, self.offset)
# Calculate theta values
theta = np.zeros(self.o - 1)
theta = np.zeros(self.o)
g2x = ker(x)
for i in range(self.o - 1):
theta[i] = np.pi * (1 + 2 * g2x * x[i]) / (4 * (1 + g2x))
theta[0] = x[0]
for i in range(1, self.o):
theta[i] = (1 + 2 * g2x * x[i]) / (2 * (1 + g2x))
# Initialize output array
fx = np.zeros(self.o)
fx[:] = (1.0 + g2x)
Expand Down Expand Up @@ -523,10 +524,11 @@ def __call__(self, x):
# Initialize kernel function
ker = __g3__(self.n, self.o, self.offset)
# Calculate theta values
theta = np.zeros(self.o - 1)
theta = np.zeros(self.o)
g3x = ker(x)
for i in range(self.o - 1):
theta[i] = np.pi * (1 + 2 * g3x * x[i]) / (4 * (1 + g3x))
theta[0] = x[0]
for i in range(1, self.o):
theta[i] = (1 + 2 * g3x * x[i]) / (2 * (1 + g3x))
# Initialize output array
fx = np.zeros(self.o)
fx[:] = (1.0 + g3x)
Expand Down Expand Up @@ -594,7 +596,7 @@ def __call__(self, x):
# Calculate kernel functions
gx = 1.0 + ker(x)
hx = (-np.sum(x[:self.o-1] *
(1.0 + np.sin(3.0 * np.pi * x[:self.o-1]) / gx))
(1.0 + np.sin(3.0 * np.pi * x[:self.o-1])) / gx)
+ float(self.o))
# Calculate the last entry in the output array
fx[self.o-1] = gx * hx
Expand Down
110 changes: 108 additions & 2 deletions lib/JAHSBench/README.md
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For each benchmark documentation we need to include at least the following sections

## Installation
...
## Configuration
...
## Metadata
...

Original file line number Diff line number Diff line change
@@ -1,3 +1,109 @@
# JAHS-Bench-201
# JAHS Benchmark Suite

* [JAHS-Bench-201 - Github](https://github.com/automl/jahs_bench_201)
This module contains a DeepHyper wrapper for
[JAHS-Bench-201](https://github.com/automl/jahs_bench_201).

JAHSBench implements a random forest surrogate model, trained on real-world
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JAHS is using XGBoost I think: https://openreview.net/pdf?id=_HLcjaVlqJ
Section 2.3, page 4

performance data for neural networks trained on three standard benchmark
problems:
- ``cifar10`` (default),
- ``colorectal_history``, and
- ``fashion_mnist``.

Using these models as surrogates for the true performance, we can use this
benchmark problem to study the performance of AutoML techniques on joint
architecture-hyperparameter search tasks at minimal expense.

The models allow us to tune 2 continuous training hyperparameters
- ``LearningRate`` and
- ``WeightDecay``,

2 categorical training hyperparameters
- ``Activation`` and
- ``TrivialAugment``,

and 5 categorical architecture parameters
- ``Op{i}`` for ``i=0, ..., 4``.

For DeepHyper's implementation, we have added 9th integer-valued parameter,
which is the number of epochs trained
- ``nepochs``.

When run with the option ``wait=True``, ``JAHSBench`` will wait for an
amount of time proportional to the ``runtime`` field returned by
JAHS-Bench-201's surrogates. By default, this is 1% of the true runtime.

The benchmark can be run to tune a single objective (``valid-acc``) or
three objectives (``valid-acc``, ``latency``, and ``size_MB``).

For further information, see:

```
@inproceedings{NEURIPS2022_fd78f2f6,
author = {Bansal, Archit and Stoll, Danny and Janowski, Maciej and Zela, Arber and Hutter, Frank},
booktitle = {Advances in Neural Information Processing Systems},
editor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh},
pages = {38788--38802},
publisher = {Curran Associates, Inc.},
title = {JAHS-Bench-201: A Foundation For Research On Joint Architecture And Hyperparameter Search},
url = {https://proceedings.neurips.cc/paper_files/paper/2022/file/fd78f2f65881c1c7ce47e26b040cf48f-Paper-Datasets_and_Benchmarks.pdf},
volume = {35},
year = {2022}
}
```

## Usage

To use the benchmark follow this example set of instructions:

```python

import deephyper_benchmark as dhb

# Install JAHS-bench-201 and fetch data
dhb.install("JAHSBench")

# Load JAHS-bench-201
dhb.load("JAHSBench")

from deephyper_benchmark.lib.jahsbench import hpo

# Example of running one evaluation of JAHSBench
from deephyper.evaluator import RunningJob
config = hpo.problem.jahs_obj.__sample__() # get a default config to test
res = hpo.run(RunningJob(parameters=config))

```

Note that JAHS-Bench-201 uses XGBoost, which may not be compatible with older
versions of MacOS.
Additionally, the surrogate data has been pickled with an older version
of scikit-learn and newer versions will fail to correctly load the surrogate
models.

For more information, see the following GitHub issues:
- https://github.com/automl/jahs_bench_201/issues/6
- https://github.com/automl/jahs_bench_201/issues/18

## Evaluating Results

To evaluate the results, the AutoML team recommends using the validation
error for single-objective runs or the hypervolume metric over both
validation error and evaluation latency for multiobjective-runs.
See their
[Evaluation Protocol](https://automl.github.io/jahs_bench_201/evaluation_protocol)
for more details.

For multiobjective runs, we recommend a reference point of
``(val_acc = 0, latency=10, size_MB=100)``, as discussed in
[this GitHub issue](https://github.com/automl/jahs_bench_201/issues/19).

To evaluate hypervolume with this reference point, use our metrics

```python

from deephyper_benchmark.lib.jahsbench import metrics
evaluator = metrics.PerformanceEvaluator()
hv = evaluator.hypervolume(res)

```
2 changes: 2 additions & 0 deletions lib/JAHSBench/REQUIREMENTS.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
jahs-bench
xgboost
1 change: 1 addition & 0 deletions lib/JAHSBench/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.0.1"
20 changes: 20 additions & 0 deletions lib/JAHSBench/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import os

from deephyper_benchmark import *

DIR = os.path.dirname(os.path.abspath(__file__))


class JAHS201Benchmark(Benchmark):

version = "0.0.1"
requires = {
"py-pip-requirements": {
"type": "pip",
"name": "-r " + os.path.join(DIR, "REQUIREMENTS.txt"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

using the more standard requirements.txt (lower case format) would be better.

},
"bash-install": {
"type": "cmd",
"cmd": "cd . && bash " + os.path.join(DIR, "./install.sh"),
},
}
56 changes: 56 additions & 0 deletions lib/JAHSBench/hpo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import os
import numpy as np
import time

from deephyper.evaluator import profile, RunningJob
from deephyper.problem import HpProblem
from . import model


# Read in whether to do single- or multi-objectives
multiobj = int(os.environ.get("DEEPHYPER_BENCHMARK_MOO", 1))

# Create problem
problem = HpProblem()
jahs_obj = model.jahs_bench()
# 2 continuous hyperparameters
problem.add_hyperparameter((1.0e-3, 1.0), "LearningRate")
problem.add_hyperparameter((1.0e-5, 1.0e-3), "WeightDecay")
# 2 categorical hyperparameters
problem.add_hyperparameter(["ReLU", "Hardswish", "Mish"], "Activation")
problem.add_hyperparameter(["on", "off"], "TrivialAugment")
# 6 categorical architecture design variables
for i in range(1, 7):
problem.add_hyperparameter([0, 1, 2, 3, 4], f"Op{i}")
# 1 integer hyperparameter number of training epochs (1 to 200)
problem.add_hyperparameter((1, 200), "nepochs")

@profile
def run(job: RunningJob, sleep=False, sleep_scale=0.01) -> dict:

config = job.parameters
result = jahs_obj(config)

if sleep:
t_sleep = result["runtime"] * sleep_scale
time.sleep(t_sleep)

dh_data = {}
dh_data["metadata"] = result
if multiobj:
dh_data["objective"] = [
result["valid-acc"],
-result["latency"],
-result['size_MB']
]
else:
dh_data["objective"] = result["valid-acc"]
return dh_data


if __name__ == "__main__":
print(problem)
default_config = problem.default_configuration
print(f"{default_config=}")
result = run(RunningJob(parameters=default_config))
print(f"{result=}")
1 change: 1 addition & 0 deletions lib/JAHSBench/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python -m jahs_bench.download --target surrogates
100 changes: 100 additions & 0 deletions lib/JAHSBench/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import os
import numpy as np
from deephyper.skopt.moo import pareto_front, hypervolume


class PerformanceEvaluator:
""" A class defining performance evaluators for JAHS Bench 201 problems.

Contains the following public methods:

* `__init__()` constructs a new instance by reading the problem defn
from environment variables,
* `hypervolume(pts)` calculates the total hypervolume dominated by
the current solution, using the Nadir point as the reference point
and filtering out solutions that do not dominate the Nadir point,
* `nadirPt()` calculates the Nadir point for the current problem,
* `numPts(pts)` calculates the number of solution points that dominate
the Nadir point, and

"""

def __init__(self, p_name="fashion_mnist"):
""" Read the current DTLZ problem defn from environment vars. """

self.p_name = p_name
multiobj = int(os.environ.get("DEEPHYPER_BENCHMARK_MOO", 1))
if multiobj:
self.nobjs = 3
else:
self.nobjs = 1

def hypervolume(self, pts):
""" Calculate the hypervolume dominated by soln, wrt the Nadir point.

Args:
pts (numpy.ndarray): A 2d array of objective values.
Each row is an objective value in the solution set.

Returns:
float: The total hypervolume dominated by the current solution,
filtering out points worse than the Nadir point and using the
Nadir point as the reference.

"""

if self.nobjs < 2:
raise ValueError("Cannot calculate hypervolume for 1 objective")
if pts.size > 0 and pts[0, 0] > 0:
filtered_pts = -pts.copy()
else:
filtered_pts = pts.copy()
nadir = self.nadirPt()
for i in range(pts.shape[0]):
if np.any(filtered_pts[i, :] > nadir):
filtered_pts[i, :] = nadir
return hypervolume(filtered_pts, nadir)

def nadirPt(self):
""" Calculate the Nadir point for the given problem definition. """

if self.p_name in ["cifar10", "colorectal_history", "fashion_mnist"]:
nadir = np.ones(self.nobjs)
nadir[0] = 0
if self.nobjs > 1:
nadir[1] = 10.0
nadir[2] = 100.0
return nadir
else:
raise ValueError(f"{self.p_name} is not a valid problem")

def numPts(self, pts):
""" Calculate the number of solutions that dominate the Nadir point.

Args:
pts (numpy.ndarra): A 2d array of objective values.
Each row is an objective value in the solution set.

Returns:
int: The number of fi in pts such that all(fi < self.nadirPt).

"""

if np.any(pts < 0):
pareto_pts = pareto_front(-pts)
else:
pareto_pts = pareto_front(pts)
return sum([all(fi <= self.nadirPt()) for fi in pareto_pts])


if __name__ == "__main__":
""" Driver code to test performance metrics. """

result = np.array([[80, -8, -10], [90, -9, -90], [10, -9.1, -99], [99.0, -1.0, -200.0]])

evaluator = PerformanceEvaluator()

assert abs(evaluator.hypervolume(result) - 14500) < 1.0e-8
assert evaluator.numPts(result) == 2
assert np.all(np.abs(evaluator.nadirPt() - np.array([0, 10, 100]))
< 1.0e-8)
Loading