Skip to content

Commit

Permalink
Merge pull request #154 from optimas-org/feature/failed_trials
Browse files Browse the repository at this point in the history
Improve handling of failed evaluations
  • Loading branch information
RemiLehe authored Feb 12, 2024
2 parents c354790 + 43b7795 commit 48d42fc
Show file tree
Hide file tree
Showing 18 changed files with 473 additions and 136 deletions.
3 changes: 2 additions & 1 deletion optimas/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .evaluation import Evaluation
from .parameter import Parameter, VaryingParameter, TrialParameter, Objective
from .task import Task
from .trial import Trial
from .trial import Trial, TrialStatus


__all__ = [
Expand All @@ -12,4 +12,5 @@
"Objective",
"Task",
"Trial",
"TrialStatus",
]
49 changes: 42 additions & 7 deletions optimas/core/trial.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
"""Contains the definition of the Trial class."""

from typing import List, Dict, Optional
from enum import Enum

import numpy as np

from .parameter import VaryingParameter, Objective, Parameter, TrialParameter
from .evaluation import Evaluation


class TrialStatus(int, Enum):
"""Enum of trial status, based on the Ax implementation."""

CANDIDATE = 0
RUNNING = 1
COMPLETED = 2
FAILED = 3


class Trial:
"""Defines a trial to be evaluated.
Expand Down Expand Up @@ -67,6 +77,7 @@ def __init__(
self._mapped_evaluations[par.name] = None
for ev in evaluations:
self._mapped_evaluations[ev.parameter.name] = ev
self.mark_as(TrialStatus.CANDIDATE)

@property
def varying_parameters(self) -> List[VaryingParameter]:
Expand Down Expand Up @@ -121,6 +132,36 @@ def custom_parameters(self) -> List[TrialParameter]:
"""Get the list of custom trial parameters."""
return self._custom_parameters

@property
def status(self) -> TrialStatus:
"""Get current trial status."""
return self._status

@property
def completed(self) -> bool:
"""Determine whether the trial has been successfully evaluated."""
return self._status == TrialStatus.COMPLETED

@property
def failed(self) -> bool:
"""Determine whether the trial evaluation has failed."""
return self._status == TrialStatus.FAILED

@property
def evaluated(self) -> bool:
"""Determine whether the trial has been evaluated."""
return self.completed or self.failed

def mark_as(self, status) -> None:
"""Set trial status.
Parameters
----------
status : int
A valid trial status (use ``TrialStatus`` enum).
"""
self._status = status

def complete_evaluation(self, evaluation: Evaluation) -> None:
"""Complete the evaluation of an objective or analyzed parameter.
Expand All @@ -134,6 +175,7 @@ def complete_evaluation(self, evaluation: Evaluation) -> None:
assert evaluated_parameter in self._mapped_evaluations
if self._mapped_evaluations[evaluated_parameter] is None:
self._mapped_evaluations[evaluated_parameter] = evaluation
self.mark_as(TrialStatus.COMPLETED)

def parameters_as_dict(self) -> Dict:
"""Get a mapping between names and values of the varying parameters."""
Expand Down Expand Up @@ -165,10 +207,3 @@ def analyzed_parameters_as_dict(self) -> Dict:
ev = self._mapped_evaluations[par.name]
params[par.name] = (ev.value, ev.sem)
return params

def completed(self) -> bool:
"""Determine whether the trial has been completed."""
for par, ev in self._mapped_evaluations.items():
if ev is None:
return False
return True
11 changes: 8 additions & 3 deletions optimas/evaluators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ class Evaluator:
n_gpus : int, optional
The number of GPUs that will be made available for each evaluation. By
default, 0.
fail_on_nan : bool, optional
Whether to mark an evaluation as failed if the value of any of the
objectives is NaN. By default, ``True``.
"""

Expand All @@ -29,6 +32,7 @@ def __init__(
sim_function: Callable,
n_procs: Optional[int] = None,
n_gpus: Optional[int] = None,
fail_on_nan: Optional[bool] = True,
) -> None:
self.sim_function = sim_function
# If no resources are specified, use 1 CPU an 0 GPUs.
Expand All @@ -44,6 +48,7 @@ def __init__(
n_gpus = 0
self._n_procs = n_procs
self._n_gpus = n_gpus
self._fail_on_nan = fail_on_nan
self._initialized = False

def get_sim_specs(
Expand All @@ -68,14 +73,14 @@ def get_sim_specs(
"in": [var.name for var in varying_parameters],
"out": (
[(obj.name, obj.dtype) for obj in objectives]
# f is the single float output that LibEnsemble minimizes.
+ [(par.name, par.dtype) for par in analyzed_parameters]
# input parameters
+ [(var.name, var.dtype) for var in varying_parameters]
+ [("trial_status", str, 10)]
),
"user": {
"n_procs": self._n_procs,
"n_gpus": self._n_gpus,
"fail_on_nan": self._fail_on_nan,
"objectives": [obj.name for obj in objectives],
},
}
return sim_specs
Expand Down
4 changes: 1 addition & 3 deletions optimas/evaluators/multitask_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,8 @@ def get_sim_specs(
self.tasks[0].name: sim_specs_1["user"],
self.tasks[1].name: sim_specs_2["user"],
}
# Add task name to sim_specs in and out.
task_len = max([len(self.tasks[0].name), len(self.tasks[1].name)])
# Add task name to sim_specs in.
sim_specs["in"].append("task")
sim_specs["out"].append(("task", str, task_len))
return sim_specs

def get_libe_specs(self) -> Dict:
Expand Down
33 changes: 29 additions & 4 deletions optimas/explorations/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens
from libensemble.executors.mpi_executor import MPIExecutor

from optimas.core.trial import TrialStatus
from optimas.generators.base import Generator
from optimas.evaluators.base import Evaluator
from optimas.evaluators.function_evaluator import FunctionEvaluator
Expand Down Expand Up @@ -128,7 +129,7 @@ def __init__(
def history(self) -> pd.DataFrame:
"""Get the exploration history."""
history = convert_to_dataframe(self._libe_history.H)
ordered_columns = ["trial_index"]
ordered_columns = ["trial_index", "trial_status"]
ordered_columns += [p.name for p in self.generator.varying_parameters]
ordered_columns += [p.name for p in self.generator.objectives]
ordered_columns += [p.name for p in self.generator.analyzed_parameters]
Expand All @@ -147,6 +148,13 @@ def run(self, n_evals: Optional[int] = None) -> None:
run until the number of evaluations reaches `max_evals`.
"""
# Store current working directory. It has been observed that sometimes
# (especially when using `local_threading`) the working directory
# is changed to the exploration directory after the call to `libE`.
# As a workaround, the cwd is stored and then set again at the end of
# `run`.
cwd = os.getcwd()

# Set exit criteria to maximum number of evaluations.
remaining_evals = self.max_evals - self._n_evals
if remaining_evals < 1:
Expand All @@ -162,7 +170,7 @@ def run(self, n_evals: Optional[int] = None) -> None:
exit_criteria["sim_max"] = sim_max

# Get initial number of generator trials.
n_evals_initial = self.generator.n_completed_trials
n_evals_initial = self.generator.n_evaluated_trials

# Create persis_info.
persis_info = add_unique_random_streams({}, self.sim_workers + 2)
Expand Down Expand Up @@ -209,8 +217,11 @@ def run(self, n_evals: Optional[int] = None) -> None:
self.generator._update(persis_info[1]["generator"])

# Update number of evaluation in this exploration.
n_trials_final = self.generator.n_completed_trials
self._n_evals += n_trials_final - n_evals_initial
n_evals_final = self.generator.n_evaluated_trials
self._n_evals += n_evals_final - n_evals_initial

# Reset `cwd` to initial value before `libE` was called.
os.chdir(cwd)

def attach_trials(
self,
Expand Down Expand Up @@ -420,10 +431,24 @@ def attach_evaluations(
self.generator._trial_count + n_evals,
dtype=int,
)
if "trial_status" not in fields:
history_new["trial_status"] = TrialStatus.COMPLETED.name

# Incorporate new history into generator.
self.generator.incorporate_history(history_new)

def mark_evaluation_as_failed(self, trial_index):
"""Mark an already evaluated trial as failed.
Parameters
----------
trial_index : int
The index of the trial.
"""
self.generator.mark_trial_as_failed(trial_index)
i = np.where(self._libe_history.H["trial_index"] == trial_index)[0][0]
self._libe_history.H[i]["trial_status"] = TrialStatus.FAILED.name

def _create_executor(self) -> None:
"""Create libEnsemble executor."""
self.executor = MPIExecutor()
Expand Down
13 changes: 9 additions & 4 deletions optimas/gen_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from libensemble.resources.resources import Resources

from optimas.core import Evaluation
from optimas.core.trial import TrialStatus


def persistent_generator(H, persis_info, gen_specs, libE_info):
Expand Down Expand Up @@ -99,11 +100,15 @@ def persistent_generator(H, persis_info, gen_specs, libE_info):
# Update the GP with latest simulation results
for i in range(n):
trial_index = int(calc_in["trial_index"][i])
trial_status = calc_in["trial_status"][i]
trial = generator.get_trial(trial_index)
for par in objectives + analyzed_parameters:
y = calc_in[par.name][i]
ev = Evaluation(parameter=par, value=y)
trial.complete_evaluation(ev)
if trial_status == TrialStatus.FAILED.name:
trial.mark_as(TrialStatus.FAILED)
else:
for par in objectives + analyzed_parameters:
y = calc_in[par.name][i]
ev = Evaluation(parameter=par, value=y)
trial.complete_evaluation(ev)
# Register trial with unknown SEM
generator.tell([trial])
# Set the number of points to generate to that number:
Expand Down
25 changes: 16 additions & 9 deletions optimas/generators/ax/developer/multitask.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
Parameter,
Task,
Trial,
TrialStatus,
)
from .ax_metric import AxMetric

Expand Down Expand Up @@ -225,10 +226,13 @@ def _incorporate_external_data(self, trials: List[Trial]) -> None:
ax_trial.run()
# Incorporate observations.
for trial in trials_i:
objective_eval = {}
oe = trial.objective_evaluations[0]
objective_eval["f"] = (oe.value, oe.sem)
ax_trial.run_metadata[trial.arm_name] = objective_eval
if trial.status != TrialStatus.FAILED:
objective_eval = {}
oe = trial.objective_evaluations[0]
objective_eval["f"] = (oe.value, oe.sem)
ax_trial.run_metadata[trial.arm_name] = objective_eval
else:
ax_trial.mark_arm_abandoned(trial.arm_name)
# Mark batch trial as completed.
ax_trial.mark_completed()
# Keep track of high-fidelity trials.
Expand All @@ -245,10 +249,13 @@ def _complete_evaluations(self, trials: List[Trial]) -> None:
"External data can only be loaded into generator before "
"initialization."
)
objective_eval = {}
oe = trial.objective_evaluations[0]
objective_eval["f"] = (oe.value, oe.sem)
self.current_trial.run_metadata[trial.arm_name] = objective_eval
if trial.status != TrialStatus.FAILED:
objective_eval = {}
oe = trial.objective_evaluations[0]
objective_eval["f"] = (oe.value, oe.sem)
self.current_trial.run_metadata[trial.arm_name] = objective_eval
else:
self.current_trial.mark_arm_abandoned(trial.arm_name)
if trial.trial_type == self.lofi_task.name:
self.returned_lofi_trials += 1
if self.returned_lofi_trials == self.n_gen_lofi:
Expand Down Expand Up @@ -447,7 +454,7 @@ def _save_model_to_file(self) -> None:
file_path = os.path.join(
self._model_history_dir,
"ax_experiment_at_eval_{}.json".format(
self._n_completed_trials_last_saved
self._n_evaluated_trials_last_saved
),
)
save_experiment(
Expand Down
5 changes: 5 additions & 0 deletions optimas/generators/ax/service/ax_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ class AxClientGenerator(AxServiceGenerator):
analyzed_parameters : list of Parameter, optional
List of parameters to analyze at each trial, but which are not
optimization objectives. By default ``None``.
abandon_failed_trials : bool, optional
Whether failed trials should be abandoned (i.e., not suggested again).
By default, ``True``.
gpu_id : int, optional
The ID of the GPU in which to run the generator. By default, ``0``.
This parameter will only have an effect if any ``GenerationStep`` in
Expand Down Expand Up @@ -61,6 +64,7 @@ def __init__(
self,
ax_client: AxClient,
analyzed_parameters: Optional[List[Parameter]] = None,
abandon_failed_trials: Optional[bool] = True,
gpu_id: Optional[int] = 0,
dedicated_resources: Optional[bool] = False,
save_model: Optional[bool] = True,
Expand All @@ -79,6 +83,7 @@ def __init__(
objectives=objectives,
analyzed_parameters=analyzed_parameters,
enforce_n_init=True,
abandon_failed_trials=abandon_failed_trials,
use_cuda=use_cuda,
gpu_id=gpu_id,
dedicated_resources=dedicated_resources,
Expand Down
Loading

0 comments on commit 48d42fc

Please sign in to comment.