Skip to content

Commit

Permalink
[Feat] Add Bayesian Optimization comparison experiment (#29)
Browse files Browse the repository at this point in the history
* [Config] Add representative workloads
* [Bug] Fix off by one error in all categorical values
* [Feat] Change callback functions on trainer to return state
  • Loading branch information
ephoris authored Aug 29, 2024
1 parent 669e3f4 commit ad59bca
Show file tree
Hide file tree
Showing 13 changed files with 362 additions and 27 deletions.
6 changes: 4 additions & 2 deletions endure.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from jobs.ltune_train import LTuneTrainJob
from jobs.botorch_bo import BayesianPipeline
from jobs.mlos_bo import BayesianPipelineMlos
from jobs.mlos_exp_runs import ExperimentMLOS


class EndureDriver:
Expand All @@ -20,7 +21,7 @@ def __init__(self, config: dict[str, Any]) -> None:
format=config["log"]["format"], datefmt=config["log"]["datefmt"]
)
self.log: logging.Logger = logging.getLogger(config["log"]["name"])
self.log.setLevel(logging.getLevelName(config["log"]["level"]))
self.log.setLevel(getattr(logging, config["log"]["level"]))
log_level = logging.getLevelName(self.log.getEffectiveLevel())
self.log.debug(f"Log level: {log_level}")

Expand All @@ -33,12 +34,13 @@ def run(self):
"LTuneTrain": LTuneTrainJob,
"BayesianPipelineBoTorch": BayesianPipeline,
"BayesianPipelineMLOS": BayesianPipelineMlos,
"ExperimentMLOS": ExperimentMLOS,
}
jobs_list = self.config["app"]["run"]
for job_name in jobs_list:
job = jobs.get(job_name, None)
if job is None:
self.log.warn(f"No job associated with {job_name}")
self.log.warning(f"No job associated with {job_name}")
continue
job = job(config)
_ = job.run()
Expand Down
111 changes: 111 additions & 0 deletions endure.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ run = [
# "LTuneTrain",
# "BayesianPipelineBoTorch",
# "BayesianPipelineMLOS"
"ExperimentMLOS",
]

# =============================================================================
Expand Down Expand Up @@ -306,3 +307,113 @@ delta = 10

[loss.MSE]
reduction = 'mean'


# =============================================================================
# HEADER WORKLOADS
# List of representative workloads used for testing suite
# =============================================================================
[[workloads]]
id = 0
z0 = 0.25
z1 = 0.25
q = 0.25
w = 0.25

[[workloads]]
id = 1
z0 = 0.97
z1 = 0.01
q = 0.01
w = 0.01

[[workloads]]
id = 2
z0 = 0.01
z1 = 0.97
q = 0.01
w = 0.01

[[workloads]]
id = 3
z0 = 0.01
z1 = 0.01
q = 0.97
w = 0.01

[[workloads]]
id = 4
z0 = 0.01
z1 = 0.01
q = 0.01
w = 0.97

[[workloads]]
id = 5
z0 = 0.49
z1 = 0.49
q = 0.01
w = 0.01

[[workloads]]
id = 6
z0 = 0.49
z1 = 0.01
q = 0.49
w = 0.01

[[workloads]]
id = 7
z0 = 0.49
z1 = 0.01
q = 0.01
w = 0.49

[[workloads]]
id = 8
z0 = 0.01
z1 = 0.49
q = 0.49
w = 0.01

[[workloads]]
id = 9
z0 = 0.01
z1 = 0.49
q = 0.01
w = 0.49

[[workloads]]
id = 10
z0 = 0.01
z1 = 0.01
q = 0.49
w = 0.49

[[workloads]]
id = 11
z0 = 0.33
z1 = 0.33
q = 0.33
w = 0.01

[[workloads]]
id = 12
z0 = 0.33
z1 = 0.33
q = 0.01
w = 0.33

[[workloads]]
id = 13
z0 = 0.33
z1 = 0.01
q = 0.33
w = 0.33

[[workloads]]
id = 14
z0 = 0.01
z1 = 0.33
q = 0.33
w = 0.33
2 changes: 1 addition & 1 deletion endure/lcm/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(
self._shuffle: bool = shuffle
self.max_levels = bounds.max_considered_levels
self.min_size_ratio, self.max_size_ratio = bounds.size_ratio_range
self.categories = self.max_size_ratio - self.min_size_ratio + 1
self.categories = self.max_size_ratio - self.min_size_ratio
# When in testing mode we transform input features to one hot encoded
self.test_mode = test
self.bounds = bounds
Expand Down
2 changes: 1 addition & 1 deletion endure/lcm/model/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(
self.dropout = dropout
self.max_levels = max_levels
self.size_ratio_min, self.size_ratio_max = size_ratio_range
self.capacity_range = self.size_ratio_max - self.size_ratio_min + 1
self.capacity_range = self.size_ratio_max - self.size_ratio_min

self.norm_layer = nn.BatchNorm1d
if norm_layer == "Layer":
Expand Down
10 changes: 5 additions & 5 deletions endure/lcm/util/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,22 @@ def create_input_from_types(
min_t: int,
max_t: int,
) -> Tensor:
categories = max_t - min_t + 1
categories = max_t - min_t
wl = [z0, z1, q, w]
sys = [system.B, system.s, system.E, system.H, system.N]
size_ratio = design.T - min_t
size_ratio_idx = design.T - min_t
if design.policy in (Policy.Tiering, Policy.Leveling):
inputs = wl + sys + [design.h, size_ratio, design.policy.value]
inputs = wl + sys + [design.h, size_ratio_idx, design.policy.value]
data = torch.Tensor(inputs)
out = one_hot_lcm_classic(data, categories)
elif design.policy == Policy.KHybrid:
ks = [k - 1 if k > 0 else 0 for k in design.K]
inputs = wl + sys + [design.h, size_ratio] + ks
inputs = wl + sys + [design.h, size_ratio_idx] + ks
data = torch.Tensor(inputs)
num_feats = 1 + len(design.K)
out = one_hot_lcm(data, len(inputs), num_feats, categories)
else: # design.policy == Policy.QFixed
inputs = wl + sys + [design.h, size_ratio, design.Q - 1]
inputs = wl + sys + [design.h, size_ratio_idx, design.Q - 1]
data = torch.Tensor(inputs)
out = one_hot_lcm(data, len(inputs), 2, categories)

Expand Down
9 changes: 5 additions & 4 deletions endure/lsm/solver/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,18 @@ def get_bounds(
h_bounds = get_h_bounds(bounds, system)

lb = (h_bounds[0], t_bounds[0])
ub = (h_bounds[1], t_bounds[1])
ub = (h_bounds[1], t_bounds[1] - 1)
# Because scipy optimizer bounds are INCLUSIVE on the right hand we subtract 2
if policy == Policy.QFixed:
lb += (t_bounds[0] - 1,)
ub += (t_bounds[1] - 1,)
ub += (t_bounds[1] - 2,)
elif policy == Policy.YZHybrid:
lb += (t_bounds[0] - 1, t_bounds[0] - 1)
ub += (t_bounds[1] - 1, t_bounds[1] - 1)
ub += (t_bounds[1] - 2, t_bounds[1] - 1)
elif policy == Policy.KHybrid:
max_levels: int = bounds.max_considered_levels
lb += tuple(t_bounds[0] - 1 for _ in range(max_levels))
ub += tuple(t_bounds[1] - 1 for _ in range(max_levels))
ub += tuple(t_bounds[1] - 2 for _ in range(max_levels))
elif policy in (Policy.Tiering, Policy.Leveling):
pass # No need to add more items for classic policy

Expand Down
2 changes: 1 addition & 1 deletion endure/ltune/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(self, config: dict[str, Any], model_path: str):
)
status = self.model.load_state_dict(data)
self.capacity_range = (
self.bounds.size_ratio_range[1] - self.bounds.size_ratio_range[0] + 1
self.bounds.size_ratio_range[1] - self.bounds.size_ratio_range[0]
)
self.num_levels = self.bounds.max_considered_levels

Expand Down
2 changes: 1 addition & 1 deletion endure/ltune/model/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(
self.categorical_mode = categorical_mode
self.max_levels = max_levels
self.size_ratio_min, self.size_ratio_max = size_ratio_range
self.capacity_range = self.size_ratio_max - self.size_ratio_min + 1
self.capacity_range = self.size_ratio_max - self.size_ratio_min

self.norm_layer = nn.BatchNorm1d
if norm_layer == "Layer":
Expand Down
7 changes: 4 additions & 3 deletions endure/ltune/util/ltune_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(
self.design_type = design_type

def calc_size_ratio_range(self) -> int:
return self.max_t - self.min_t + 1
return self.max_t - self.min_t

def eval_lcm(
self,
Expand Down Expand Up @@ -74,12 +74,13 @@ def get_ltune_out(
q: float,
w: float,
temp=1e-2,
hard=False,
hard=True,
) -> Tensor:
x = torch.Tensor([z0, z1, q, w, system.B, system.s,
system.E, system.H, system.N])
x = x.view(1, -1)
out = self.model(x, temp=temp, hard=hard)
with torch.no_grad():
out = self.model(x, temp=temp, hard=hard)

return out

Expand Down
4 changes: 2 additions & 2 deletions endure/util/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __init__(
model_test_kwargs: dict[str, Any] = {},
disable_tqdm: bool = False,
no_checkpoint: bool = False,
train_callback: Optional[Callable[[dict], None]] = None,
train_callback: Optional[Callable[[dict], dict]] = None,
) -> None:
self.log = log
self.model = model
Expand Down Expand Up @@ -95,7 +95,7 @@ def _train_loop(self) -> float:
self.scheduler.step()

if self.train_callback is not None:
self.train_callback(self.model_train_kwargs)
self.model_train_kwargs = self.train_callback(self.model_train_kwargs)

if self.train_len == 0:
self.train_len = batch + 1
Expand Down
12 changes: 5 additions & 7 deletions jobs/ltune_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,28 +120,26 @@ def gumbel_temp_schedule(
train_kwargs: dict,
decay_rate: float = 0.95,
floor: float = 0.01,
) -> None:
) -> dict:
train_kwargs["temp"] *= decay_rate
if train_kwargs["temp"] < floor:
train_kwargs["temp"] = floor

return
return train_kwargs

@staticmethod
def reinmax_temp_schedule(
train_kwargs: dict,
decay_rate: float = 0.9,
floor: float = 1,
) -> None:
) -> dict:
train_kwargs["temp"] *= decay_rate
if train_kwargs["temp"] < floor:
train_kwargs["temp"] = floor

return
return train_kwargs

def get_train_callback(self) -> Optional[Callable[[dict], None]]:
if not self.design == Policy.KHybrid:
return None
def get_train_callback(self) -> Optional[Callable[[dict], dict]]:
if self.config["ltune"]["model"]["categorical_mode"] == "reinmax":
return lambda train_kwargs: self.reinmax_temp_schedule(train_kwargs)
# default train_callback will be gumbel softmax
Expand Down
Loading

0 comments on commit ad59bca

Please sign in to comment.