diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index db4e7bc..0000000 --- a/.dockerignore +++ /dev/null @@ -1,5 +0,0 @@ -Dockerfile -README.md -notebook -.git -.gitignore diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index f8e0528..0000000 --- a/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -FROM python:3.9-buster - -# Create arguments to set the default local user information -ARG CONTAINER_USER=ndhuynh -ARG CONTAINER_GROUP=cs-grad -ARG CONTAINER_UID=263606 -ARG CONTAINER_GID=3935 - -# Create a non-root user for the server -RUN groupadd -g ${CONTAINER_GID} ${CONTAINER_GROUP} -RUN useradd -l -ms /bin/bash ${CONTAINER_USER} -u ${CONTAINER_UID} -g ${CONTAINER_GID} - -WORKDIR /endure -COPY requirements.txt /tmp -COPY notebook/requirements.txt /tmp/jupyter-requirements.txt - -USER root -RUN apt-get update && apt-get install -y vim tmux - -USER ${CONTAINER_USER} -ENV PATH "/home/${CONTAINER_USER}/.local/bin:$PATH" -RUN pip install -r /tmp/requirements.txt -RUN pip install -r /tmp/jupyter-requirements.txt - -EXPOSE 8888 - -ENTRYPOINT ["bash"] diff --git a/README.md b/README.md index 9f944a4..4ff628d 100644 --- a/README.md +++ b/README.md @@ -46,16 +46,16 @@ These are the general steps that can be used to generate data, train each model ```bash pip install -r requirements.txt ``` -8. **Configure the endure.toml file (More details can be found below)** \ - The [endure.toml](#Configuration-File) file contains all required options for the experiments and jobs. Please configure all the jobs and their respective parameters required to successfully run the project. +8. **Configure the axe.toml file (More details can be found below)** \ + The [axe.toml](#Configuration-File) file contains all required options for the experiments and jobs. Please configure all the jobs and their respective parameters required to successfully run the project. 9. **Run the project** \ - The project is structured such that all experiments can be run by just running the endure.py file. Use the following command to run it: + The project is structured such that all experiments can be run by just running the axe.py file. Use the following command to run it: ```bash - python endure.py + python axe.py ``` ## Configuration File -All of our configuration is handled by the endure.toml file placed in the root directory of our project. +All of our configuration is handled by the axe.toml file placed in the root directory of our project. The config file is divided into sections denoted by a heading within the square brackets (for example *[app]*) 1. To run a specific job, change the variable **run** under the *[app]* header. All the available jobs are already present in the variable and provided as commented out code. Uncomment all jobs that have to be run to run multiple jobs or a single job. @@ -76,14 +76,14 @@ The config file is divided into sections denoted by a heading within the square ## Project Structure Our project is separated into repositories to maintain a consistent structure. Below is a short description for each of the repositories to help any user of this project: -1. **jobs** - This repo contains the main entry point defined in endure.py for each job. There are separate files dedicated to each job that can be identified using the filenames. +1. **jobs** - This repo contains the main entry point defined in axe.py for each job. There are separate files dedicated to each job that can be identified using the filenames. -2. **endure** - This repo contains multiple other repositories divided by use case as defined below: +2. **axe** - This repo contains multiple other repositories divided by use case as defined below: -  a. **endure/lcm** - This repo contains the folders and files responsible for the *Learned Cost Model (LCM)* that helps learn the cost surface for the implemented solution. Within this directory, there is a *model* folder that contains the files for each LCM model structure (Classic, KLSM, QHybrid, Doestoevsky (YZLSM)). The *util* folder contains all utilities used by the models within *lcm*. The *data* folder contains code required for generating and using the input (data) files by the models. +  a. **axe/lcm** - This repo contains the folders and files responsible for the *Learned Cost Model (LCM)* that helps learn the cost surface for the implemented solution. Within this directory, there is a *model* folder that contains the files for each LCM model structure (Classic, KLSM, QHybrid, Doestoevsky (YZLSM)). The *util* folder contains all utilities used by the models within *lcm*. The *data* folder contains code required for generating and using the input (data) files by the models. -  b. **endure/lsm** - This repo contains the analytical solver repository called *solver* that is used as the basis of comparison as well as files associated with the structure of the lsm. It has a *types.py* file that is used throughout our project to define each type used. The project also has cost files that use the equations from the cost model (as stated in the paper) to calculate the cost of each operation for all models. There is a also a data_generator file that is used for generation of data from a given sample space uniformly at random. +  b. **axe/lsm** - This repo contains the analytical solver repository called *solver* that is used as the basis of comparison as well as files associated with the structure of the lsm. It has a *types.py* file that is used throughout our project to define each type used. The project also has cost files that use the equations from the cost model (as stated in the paper) to calculate the cost of each operation for all models. There is a also a data_generator file that is used for generation of data from a given sample space uniformly at random. -  c. **endure/ltune** - This repo contains the folders and files responsible for the *Learned Tuner (LTuner)* that helps predict the best configuration for the LSM Tree as per the solution proposed in the paper. Within this directory, there is a *model* folder that contains the files for each Ltuner model structure (Classic, KLSM, QHybrid, Doestoevsky (YZLSM)). The *util* folder contains all utilities used by the models within *ltune*. The *data* folder contains code required for generating and using the input (data) files by the models. +  c. **axe/ltune** - This repo contains the folders and files responsible for the *Learned Tuner (LTuner)* that helps predict the best configuration for the LSM Tree as per the solution proposed in the paper. Within this directory, there is a *model* folder that contains the files for each Ltuner model structure (Classic, KLSM, QHybrid, Doestoevsky (YZLSM)). The *util* folder contains all utilities used by the models within *ltune*. The *data* folder contains code required for generating and using the input (data) files by the models. -  d. **endure/util** - Contains utility files that are used generally by all modules. +  d. **axe/util** - Contains utility files that are used generally by all modules. diff --git a/endure.py b/axe.py old mode 100755 new mode 100644 similarity index 94% rename from endure.py rename to axe.py index c2011d0..327d944 --- a/endure.py +++ b/axe.py @@ -13,7 +13,7 @@ from jobs.mlos_exp_runs import ExperimentMLOS -class EndureDriver: +class AxeDriver: def __init__(self, config: dict[str, Any]) -> None: self.config = config @@ -53,10 +53,10 @@ def run(self): config_path = sys.argv[1] else: file_dir = os.path.dirname(__file__) - config_path = os.path.join(file_dir, "endure.toml") + config_path = os.path.join(file_dir, "axe.toml") with open(config_path) as fid: config = toml.load(fid) - driver = EndureDriver(config) + driver = AxeDriver(config) driver.run() diff --git a/endure.toml b/axe.toml similarity index 99% rename from endure.toml rename to axe.toml index 321cbdf..5f389f7 100644 --- a/endure.toml +++ b/axe.toml @@ -1,5 +1,5 @@ # ============================================================================= -# ENDURE Configuration File +# AXE Configuration File # Following subsections are available # APP # LOGGER - output setting @@ -18,7 +18,7 @@ # Logic of app including jobs list to run # ============================================================================= [app] -name = "ENDURE" +name = "AXE" run = [ # "DataGen", # "LCMTrain", @@ -33,7 +33,7 @@ run = [ # Generic IO settings for experiments, saving data, etc # ============================================================================= [log] -name = 'endure-logger' +name = 'axe-logger' format = "[%(levelname)s][%(asctime)-15s][%(filename)s] %(message)s" datefmt = '%d-%m-%y:%H:%M:%S' level = "DEBUG" diff --git a/endure/__init__.py b/axe/__init__.py similarity index 100% rename from endure/__init__.py rename to axe/__init__.py diff --git a/endure/data/__init__.py b/axe/data/__init__.py similarity index 100% rename from endure/data/__init__.py rename to axe/data/__init__.py diff --git a/endure/data/io.py b/axe/data/io.py similarity index 95% rename from endure/data/io.py rename to axe/data/io.py index ded62e2..5b0dea5 100644 --- a/endure/data/io.py +++ b/axe/data/io.py @@ -8,7 +8,7 @@ class Writer(object): def __init__(self, config): self._config = config - self.log = logging.getLogger("endure") + self.log = logging.getLogger("axe") def export_csv_file(self, df, filename): """ @@ -57,7 +57,7 @@ def __init__(self, config): """ self._config = config self.data_dir = self._config["io"]["data_dir"] - self.log = logging.getLogger("endure") + self.log = logging.getLogger("axe") @classmethod def read_config(cls, config_path): diff --git a/endure/lcm/__init__.py b/axe/lcm/__init__.py similarity index 100% rename from endure/lcm/__init__.py rename to axe/lcm/__init__.py diff --git a/endure/lcm/data/dataset.py b/axe/lcm/data/dataset.py similarity index 94% rename from endure/lcm/data/dataset.py rename to axe/lcm/data/dataset.py index 7d896ca..0cc6b2e 100644 --- a/endure/lcm/data/dataset.py +++ b/axe/lcm/data/dataset.py @@ -8,9 +8,9 @@ from torch import Tensor import torch.utils.data -from endure.lcm.data.input_features import kINPUT_FEATS_DICT, kOUTPUT_FEATS -from endure.lsm.types import LSMBounds, Policy -from endure.lcm.util import one_hot_lcm, one_hot_lcm_classic +from axe.lcm.data.input_features import kINPUT_FEATS_DICT, kOUTPUT_FEATS +from axe.lsm.types import LSMBounds, Policy +from axe.lcm.util import one_hot_lcm, one_hot_lcm_classic class LCMDataSet(torch.utils.data.IterableDataset): @@ -102,4 +102,4 @@ def __iter__(self): label, input = labels[idx], inputs[idx] if self.test_mode: input = self._transform_test_data(inputs[idx]) - yield label, input \ No newline at end of file + yield label, input diff --git a/endure/lcm/data/generator.py b/axe/lcm/data/generator.py similarity index 98% rename from endure/lcm/data/generator.py rename to axe/lcm/data/generator.py index e67b180..a4975a0 100644 --- a/endure/lcm/data/generator.py +++ b/axe/lcm/data/generator.py @@ -4,9 +4,9 @@ import numpy as np -from endure.lsm.types import LSMDesign, System, Policy, LSMBounds -from endure.lsm.cost import EndureCost -from endure.lcm.data.input_features import ( +from axe.lsm.types import LSMDesign, System, Policy, LSMBounds +from axe.lsm.cost import EndureCost +from axe.lcm.data.input_features import ( kWORKLOAD_HEADER, kSYSTEM_HEADER, kCOST_HEADER, diff --git a/endure/lcm/data/input_features.py b/axe/lcm/data/input_features.py similarity index 95% rename from endure/lcm/data/input_features.py rename to axe/lcm/data/input_features.py index 1845a4f..4adec75 100644 --- a/endure/lcm/data/input_features.py +++ b/axe/lcm/data/input_features.py @@ -1,4 +1,4 @@ -from endure.lsm.types import Policy +from axe.lsm.types import Policy kSYSTEM_HEADER = [ "entry_p_page", diff --git a/endure/lcm/model/__init__.py b/axe/lcm/model/__init__.py similarity index 100% rename from endure/lcm/model/__init__.py rename to axe/lcm/model/__init__.py diff --git a/endure/lcm/model/builder.py b/axe/lcm/model/builder.py similarity index 93% rename from endure/lcm/model/builder.py rename to axe/lcm/model/builder.py index 8690e8e..0a0b3cd 100644 --- a/endure/lcm/model/builder.py +++ b/axe/lcm/model/builder.py @@ -3,9 +3,9 @@ from torch import nn import torch -from endure.lcm.data.input_features import kINPUT_FEATS_DICT -from endure.lcm.model import KapModel, QModel, ClassicModel, YZModel -from endure.lsm.types import Policy +from axe.lcm.data.input_features import kINPUT_FEATS_DICT +from axe.lcm.model import KapModel, QModel, ClassicModel, YZModel +from axe.lsm.types import Policy class LearnedCostModelBuilder: @@ -78,4 +78,4 @@ def build_model(self, policy: Policy) -> torch.nn.Module: model = model_class(**args) - return model \ No newline at end of file + return model diff --git a/endure/lcm/model/classic_model.py b/axe/lcm/model/classic_model.py similarity index 100% rename from endure/lcm/model/classic_model.py rename to axe/lcm/model/classic_model.py diff --git a/endure/lcm/model/flexible_model.py b/axe/lcm/model/flexible_model.py similarity index 100% rename from endure/lcm/model/flexible_model.py rename to axe/lcm/model/flexible_model.py diff --git a/endure/lcm/model/kaplsm_model.py b/axe/lcm/model/kaplsm_model.py similarity index 100% rename from endure/lcm/model/kaplsm_model.py rename to axe/lcm/model/kaplsm_model.py diff --git a/endure/lcm/model/qlsm_model.py b/axe/lcm/model/qlsm_model.py similarity index 100% rename from endure/lcm/model/qlsm_model.py rename to axe/lcm/model/qlsm_model.py diff --git a/endure/lcm/model/yzlsm_model.py b/axe/lcm/model/yzlsm_model.py similarity index 100% rename from endure/lcm/model/yzlsm_model.py rename to axe/lcm/model/yzlsm_model.py diff --git a/endure/lcm/util/__init__.py b/axe/lcm/util/__init__.py similarity index 100% rename from endure/lcm/util/__init__.py rename to axe/lcm/util/__init__.py diff --git a/endure/lcm/util/lcm_evaluation.py b/axe/lcm/util/lcm_evaluation.py similarity index 92% rename from endure/lcm/util/lcm_evaluation.py rename to axe/lcm/util/lcm_evaluation.py index e8dd681..0c4894c 100644 --- a/endure/lcm/util/lcm_evaluation.py +++ b/axe/lcm/util/lcm_evaluation.py @@ -1,9 +1,9 @@ import torch from .util import eval_lcm_impl -from endure.lcm.data.generator import LCMDataGenerator -from endure.lsm.cost import EndureCost -from endure.lsm.types import LSMDesign, Policy, System +from axe.lcm.data.generator import LCMDataGenerator +from axe.lsm.cost import EndureCost +from axe.lsm.types import LSMDesign, Policy, System class LCMEvalUtil: def __init__( @@ -67,4 +67,4 @@ def gen_random_sample(self): row['cost_acm'] = cost_acm return row, design, system - \ No newline at end of file + diff --git a/endure/lcm/util/util.py b/axe/lcm/util/util.py similarity index 98% rename from endure/lcm/util/util.py rename to axe/lcm/util/util.py index b9931ee..15e71bb 100644 --- a/endure/lcm/util/util.py +++ b/axe/lcm/util/util.py @@ -2,7 +2,7 @@ import torch import torch.nn.functional as F -from endure.lsm.types import LSMDesign, Policy, System +from axe.lsm.types import LSMDesign, Policy, System def one_hot_lcm( data: Tensor, diff --git a/endure/lsm/__init__.py b/axe/lsm/__init__.py similarity index 100% rename from endure/lsm/__init__.py rename to axe/lsm/__init__.py diff --git a/endure/lsm/cost.py b/axe/lsm/cost.py similarity index 97% rename from endure/lsm/cost.py rename to axe/lsm/cost.py index af9eb78..793409c 100644 --- a/endure/lsm/cost.py +++ b/axe/lsm/cost.py @@ -1,6 +1,6 @@ import numpy as np -import endure.lsm.lsm_cost as Cost -from endure.lsm.types import Policy, System, LSMDesign +import axe.lsm.lsm_cost as Cost +from axe.lsm.types import Policy, System, LSMDesign class EndureCost: diff --git a/endure/lsm/data_generator.py b/axe/lsm/data_generator.py similarity index 98% rename from endure/lsm/data_generator.py rename to axe/lsm/data_generator.py index 23e2b8c..89940fa 100644 --- a/endure/lsm/data_generator.py +++ b/axe/lsm/data_generator.py @@ -4,8 +4,8 @@ import numpy as np -from endure.lsm.types import LSMDesign, System, Policy, LSMBounds, Workload -from endure.lsm.cost import EndureCost +from axe.lsm.types import LSMDesign, System, Policy, LSMBounds, Workload +from axe.lsm.cost import EndureCost class LSMDataGenerator: diff --git a/endure/lsm/lsm_cost.py b/axe/lsm/lsm_cost.py similarity index 100% rename from endure/lsm/lsm_cost.py rename to axe/lsm/lsm_cost.py diff --git a/endure/lsm/solver/__init__.py b/axe/lsm/solver/__init__.py similarity index 94% rename from endure/lsm/solver/__init__.py rename to axe/lsm/solver/__init__.py index af148e6..6735f01 100644 --- a/endure/lsm/solver/__init__.py +++ b/axe/lsm/solver/__init__.py @@ -1,5 +1,5 @@ from typing import Type -from endure.lsm.types import Policy +from axe.lsm.types import Policy from .classic_solver import ClassicSolver from .qlsm_solver import QLSMSolver from .klsm_solver import KLSMSolver diff --git a/endure/lsm/solver/classic_solver.py b/axe/lsm/solver/classic_solver.py similarity index 93% rename from endure/lsm/solver/classic_solver.py rename to axe/lsm/solver/classic_solver.py index fe8cce5..fb258e7 100644 --- a/endure/lsm/solver/classic_solver.py +++ b/axe/lsm/solver/classic_solver.py @@ -1,10 +1,10 @@ -from typing import Any, Optional, Callable, Tuple, List +from typing import Optional, Callable, Tuple, List import numpy as np import scipy.optimize as SciOpt -from endure.lsm.cost import EndureCost -from endure.lsm.types import LSMDesign, Policy, System, LSMBounds +from axe.lsm.cost import EndureCost +from axe.lsm.types import LSMDesign, Policy, System, LSMBounds from .util import kl_div_con from .util import get_bounds @@ -15,11 +15,7 @@ class ClassicSolver: - def __init__( - self, - bounds: LSMBounds, - policies: Optional[List[Policy]] = None - ): + def __init__(self, bounds: LSMBounds, policies: Optional[List[Policy]] = None): self.bounds = bounds self.cf = EndureCost(bounds.max_considered_levels) if policies is None: diff --git a/endure/lsm/solver/klsm_solver.py b/axe/lsm/solver/klsm_solver.py similarity index 83% rename from endure/lsm/solver/klsm_solver.py rename to axe/lsm/solver/klsm_solver.py index 5ef6d68..c90331f 100644 --- a/endure/lsm/solver/klsm_solver.py +++ b/axe/lsm/solver/klsm_solver.py @@ -1,10 +1,10 @@ -from typing import Any, Optional, Callable, Tuple +from typing import Optional, Callable, Tuple import numpy as np import scipy.optimize as SciOpt -from endure.lsm.cost import EndureCost -from endure.lsm.types import LSMDesign, Policy, System, LSMBounds +from axe.lsm.cost import EndureCost +from axe.lsm.types import LSMDesign, Policy, System, LSMBounds from .util import kl_div_con, get_bounds from .util import H_DEFAULT, T_DEFAULT, LAMBDA_DEFAULT, ETA_DEFAULT, K_DEFAULT @@ -29,10 +29,8 @@ def robust_objective( lamb, eta = x[-2:] design = LSMDesign(h=h, T=t, K=kaps, policy=Policy.KHybrid) query_cost = 0 - query_cost += z0 * \ - kl_div_con((self.cf.Z0(design, system) - eta) / lamb) - query_cost += z1 * \ - kl_div_con((self.cf.Z1(design, system) - eta) / lamb) + query_cost += z0 * kl_div_con((self.cf.Z0(design, system) - eta) / lamb) + query_cost += z1 * kl_div_con((self.cf.Z1(design, system) - eta) / lamb) query_cost += q * kl_div_con((self.cf.Q(design, system) - eta) / lamb) query_cost += w * kl_div_con((self.cf.W(design, system) - eta) / lamb) cost = eta + (rho * lamb) + (lamb * query_cost) @@ -69,7 +67,7 @@ def get_robust_design( minimizer_kwargs: dict = {}, callback_fn: Optional[Callable] = None, ) -> Tuple[LSMDesign, SciOpt.OptimizeResult]: - raise NotImplemented + raise NotImplementedError def get_nominal_design( self, @@ -97,8 +95,7 @@ def get_nominal_design( default_kwargs.update(minimizer_kwargs) kap_val = init_args[-1] init_args = np.concatenate( - (init_args[0:2], - np.array([kap_val for _ in range(max_levels)])) + (init_args[0:2], np.array([kap_val for _ in range(max_levels)])) ) solution = SciOpt.minimize( @@ -108,10 +105,7 @@ def get_nominal_design( **default_kwargs ) design = LSMDesign( - h=solution.x[0], - T=solution.x[1], - K=solution.x[2:], - policy=Policy.KHybrid + h=solution.x[0], T=solution.x[1], K=solution.x[2:], policy=Policy.KHybrid ) return design, solution diff --git a/endure/lsm/solver/qlsm_solver.py b/axe/lsm/solver/qlsm_solver.py similarity index 88% rename from endure/lsm/solver/qlsm_solver.py rename to axe/lsm/solver/qlsm_solver.py index 44cfc34..dd74e8d 100644 --- a/endure/lsm/solver/qlsm_solver.py +++ b/axe/lsm/solver/qlsm_solver.py @@ -1,15 +1,16 @@ -from typing import Any, Optional, Callable, Tuple +from typing import Optional, Callable, Tuple import numpy as np import scipy.optimize as SciOpt -from endure.lsm.cost import EndureCost -from endure.lsm.types import LSMDesign, Policy, System, LSMBounds +from axe.lsm.cost import EndureCost +from axe.lsm.types import LSMDesign, Policy, System, LSMBounds from .util import kl_div_con, get_bounds from .util import H_DEFAULT, T_DEFAULT, LAMBDA_DEFAULT, ETA_DEFAULT, Q_DEFAULT + class QLSMSolver: - def __init__(self, bounds:LSMBounds): + def __init__(self, bounds: LSMBounds): self.bounds = bounds self.cf = EndureCost(bounds.max_considered_levels) @@ -63,7 +64,7 @@ def get_robust_design( minimizer_kwargs: dict = {}, callback_fn: Optional[Callable] = None, ) -> Tuple[LSMDesign, SciOpt.OptimizeResult]: - raise NotImplemented + raise NotImplementedError def get_nominal_design( self, @@ -95,10 +96,7 @@ def get_nominal_design( **default_kwargs ) design = LSMDesign( - h=solution.x[0], - T=solution.x[1], - Q=solution.x[2], - policy=Policy.QFixed) + h=solution.x[0], T=solution.x[1], Q=solution.x[2], policy=Policy.QFixed + ) return design, solution - diff --git a/endure/lsm/solver/util.py b/axe/lsm/solver/util.py similarity index 93% rename from endure/lsm/solver/util.py rename to axe/lsm/solver/util.py index 26bbf95..9200f1c 100644 --- a/endure/lsm/solver/util.py +++ b/axe/lsm/solver/util.py @@ -1,9 +1,9 @@ -from typing import Any, Optional, Callable, Tuple +from typing import Optional, Tuple import numpy as np import scipy.optimize as SciOpt -from endure.lsm.types import Policy, System, LSMBounds +from axe.lsm.types import Policy, System, LSMBounds H_DEFAULT = 3 T_DEFAULT = 3 @@ -48,7 +48,7 @@ def get_bounds( bounds: LSMBounds, policy: Policy = Policy.Leveling, system: Optional[System] = None, - robust: bool = False + robust: bool = False, ) -> SciOpt.Bounds: t_bounds = get_t_bounds(bounds) h_bounds = get_h_bounds(bounds, system) diff --git a/endure/lsm/solver/yzlsm_solver.py b/axe/lsm/solver/yzlsm_solver.py similarity index 95% rename from endure/lsm/solver/yzlsm_solver.py rename to axe/lsm/solver/yzlsm_solver.py index 1763471..91efcdc 100644 --- a/endure/lsm/solver/yzlsm_solver.py +++ b/axe/lsm/solver/yzlsm_solver.py @@ -1,10 +1,10 @@ -from typing import Any, Optional, Callable, Tuple +from typing import Optional, Callable, Tuple import numpy as np import scipy.optimize as SciOpt -from endure.lsm.cost import EndureCost -from endure.lsm.types import LSMDesign, Policy, System, LSMBounds +from axe.lsm.cost import EndureCost +from axe.lsm.types import LSMDesign, Policy, System, LSMBounds from .util import kl_div_con, get_bounds from .util import ( H_DEFAULT, diff --git a/endure/lsm/types.py b/axe/lsm/types.py similarity index 100% rename from endure/lsm/types.py rename to axe/lsm/types.py diff --git a/endure/ltune/__init__.py b/axe/ltune/__init__.py similarity index 100% rename from endure/ltune/__init__.py rename to axe/ltune/__init__.py diff --git a/endure/ltune/data/__init__.py b/axe/ltune/data/__init__.py similarity index 100% rename from endure/ltune/data/__init__.py rename to axe/ltune/data/__init__.py diff --git a/endure/ltune/data/dataset.py b/axe/ltune/data/dataset.py similarity index 95% rename from endure/ltune/data/dataset.py rename to axe/ltune/data/dataset.py index 1a95790..df58e08 100644 --- a/endure/ltune/data/dataset.py +++ b/axe/ltune/data/dataset.py @@ -5,7 +5,7 @@ import torch import torch.utils.data -from endure.ltune.data.input_features import kINPUT_FEATS +from axe.ltune.data.input_features import kINPUT_FEATS class LTuneDataSet(torch.utils.data.IterableDataset): diff --git a/endure/ltune/data/generator.py b/axe/ltune/data/generator.py similarity index 94% rename from endure/ltune/data/generator.py rename to axe/ltune/data/generator.py index ce8bc02..eeb1eaa 100644 --- a/endure/ltune/data/generator.py +++ b/axe/ltune/data/generator.py @@ -1,9 +1,9 @@ -from typing import List, Tuple, Union +from typing import Union import numpy as np -from endure.lsm.types import LSMBounds, System -from endure.ltune.data.input_features import kSYSTEM_HEADER, kWORKLOAD_HEADER +from axe.lsm.types import LSMBounds, System +from axe.ltune.data.input_features import kSYSTEM_HEADER, kWORKLOAD_HEADER class LTuneDataGenerator: diff --git a/endure/ltune/data/input_features.py b/axe/ltune/data/input_features.py similarity index 100% rename from endure/ltune/data/input_features.py rename to axe/ltune/data/input_features.py diff --git a/endure/ltune/loss.py b/axe/ltune/loss.py similarity index 97% rename from endure/ltune/loss.py rename to axe/ltune/loss.py index a998050..3d73688 100644 --- a/endure/ltune/loss.py +++ b/axe/ltune/loss.py @@ -5,8 +5,8 @@ from torch import Tensor import toml -from endure.lcm.model.builder import LearnedCostModelBuilder -from endure.lsm.types import Policy, LSMBounds +from axe.lcm.model.builder import LearnedCostModelBuilder +from axe.lsm.types import Policy, LSMBounds class LearnedCostModelLoss(torch.nn.Module): @@ -21,7 +21,7 @@ def __init__(self, config: dict[str, Any], model_path: str): self.bounds: LSMBounds = LSMBounds(**config["lsm"]["bounds"]) lcm_cfg = toml.load( - os.path.join(config["io"]["data_dir"], model_path, "endure.toml") + os.path.join(config["io"]["data_dir"], model_path, "axe.toml") ) lcm_model = getattr(Policy, lcm_cfg["lsm"]["design"]) lcm_bounds: LSMBounds = LSMBounds(**lcm_cfg["lsm"]["bounds"]) diff --git a/endure/ltune/model/__init__.py b/axe/ltune/model/__init__.py similarity index 100% rename from endure/ltune/model/__init__.py rename to axe/ltune/model/__init__.py diff --git a/endure/ltune/model/builder.py b/axe/ltune/model/builder.py similarity index 90% rename from endure/ltune/model/builder.py rename to axe/ltune/model/builder.py index 6ef5c8b..ad1b485 100644 --- a/endure/ltune/model/builder.py +++ b/axe/ltune/model/builder.py @@ -1,10 +1,10 @@ import torch from typing import Tuple from torch import nn -from endure.lsm.types import Policy +from axe.lsm.types import Policy -from endure.ltune.model import ClassicTuner, QLSMTuner, KapLSMTuner, YZLSMTuner -from endure.ltune.data.input_features import kINPUT_FEATS +from axe.ltune.model import ClassicTuner, QLSMTuner, KapLSMTuner, YZLSMTuner +from axe.ltune.data.input_features import kINPUT_FEATS class LTuneModelBuilder: @@ -62,4 +62,4 @@ def build_model(self, policy: Policy) -> torch.nn.Module: model = model_class(**kwargs) - return model \ No newline at end of file + return model diff --git a/endure/ltune/model/classic_tuner.py b/axe/ltune/model/classic_tuner.py similarity index 100% rename from endure/ltune/model/classic_tuner.py rename to axe/ltune/model/classic_tuner.py diff --git a/endure/ltune/model/kaplsm_tuner.py b/axe/ltune/model/kaplsm_tuner.py similarity index 100% rename from endure/ltune/model/kaplsm_tuner.py rename to axe/ltune/model/kaplsm_tuner.py diff --git a/endure/ltune/model/qlsm_tuner.py b/axe/ltune/model/qlsm_tuner.py similarity index 100% rename from endure/ltune/model/qlsm_tuner.py rename to axe/ltune/model/qlsm_tuner.py diff --git a/endure/ltune/model/yzlsm_tuner.py b/axe/ltune/model/yzlsm_tuner.py similarity index 100% rename from endure/ltune/model/yzlsm_tuner.py rename to axe/ltune/model/yzlsm_tuner.py diff --git a/endure/ltune/util/__init__.py b/axe/ltune/util/__init__.py similarity index 100% rename from endure/ltune/util/__init__.py rename to axe/ltune/util/__init__.py diff --git a/endure/ltune/util/ltune_eval.py b/axe/ltune/util/ltune_eval.py similarity index 96% rename from endure/ltune/util/ltune_eval.py rename to axe/ltune/util/ltune_eval.py index bc88f1c..592d3fa 100644 --- a/endure/ltune/util/ltune_eval.py +++ b/axe/ltune/util/ltune_eval.py @@ -4,12 +4,12 @@ import scipy.optimize as SciOpt import torch -from endure.lcm.util import eval_lcm_impl -from endure.lsm.cost import EndureCost -from endure.lsm.types import LSMBounds, LSMDesign, System, Policy -from endure.ltune.data.generator import LTuneDataGenerator -from endure.ltune.loss import LearnedCostModelLoss -import endure.lsm.solver as Solver +from axe.lcm.util import eval_lcm_impl +from axe.lsm.cost import EndureCost +from axe.lsm.types import LSMBounds, LSMDesign, System, Policy +from axe.ltune.data.generator import LTuneDataGenerator +from axe.ltune.loss import LearnedCostModelLoss +import axe.lsm.solver as Solver class LTuneEvalUtil: @@ -230,4 +230,4 @@ def gen_sample_eval(self, system: Optional[System] = None): row['stune_K'] = stune_design.K row['ltune_K'] = ltune_design.K - return row \ No newline at end of file + return row diff --git a/endure/util/__init__.py b/axe/util/__init__.py similarity index 100% rename from endure/util/__init__.py rename to axe/util/__init__.py diff --git a/endure/util/losses.py b/axe/util/losses.py similarity index 100% rename from endure/util/losses.py rename to axe/util/losses.py diff --git a/endure/util/lr_scheduler.py b/axe/util/lr_scheduler.py similarity index 100% rename from endure/util/lr_scheduler.py rename to axe/util/lr_scheduler.py diff --git a/endure/util/optimizer.py b/axe/util/optimizer.py similarity index 100% rename from endure/util/optimizer.py rename to axe/util/optimizer.py diff --git a/endure/util/trainer.py b/axe/util/trainer.py similarity index 100% rename from endure/util/trainer.py rename to axe/util/trainer.py diff --git a/docker-compose.yaml b/docker-compose.yaml deleted file mode 100644 index c9465ad..0000000 --- a/docker-compose.yaml +++ /dev/null @@ -1,17 +0,0 @@ ---- -version: "1" -services: - endure: - image: ndhuynh/endure - container_name: endure - user: "${UID}:${GID}" - environment: - - TZ=America/New_York - tty: true - stdin_open: true - - volumes: - - /scratchNVM1/ndhuynh/data:/data - - /scratchNVM0/ndhuynh/endure-torch:/endure - ports: - - "8888:8888" diff --git a/experiments/__init__.py b/experiments/__init__.py deleted file mode 100644 index ab0b31e..0000000 --- a/experiments/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .exp01 import Exp01 -from .cost_surface_exp import CostSurfaceExp diff --git a/experiments/cost_surface_exp.py b/experiments/cost_surface_exp.py deleted file mode 100644 index a6a093d..0000000 --- a/experiments/cost_surface_exp.py +++ /dev/null @@ -1,157 +0,0 @@ -# import logging -# import pandas as pd -# import numpy as np -# from itertools import combinations_with_replacement -# from data.dataio import Writer -# from lsm.lsmtype import LSMSystem -# import lsm.cost as CostFunc -# -# MAX_LEVELS = 16 -# -# -# class CostSurfaceExp: -# def __init__(self, config: dict) -> None: -# self.config = config -# self.log = logging.getLogger("endure") -# self.writer = Writer(self.config) -# -# def wl_to_array(self, wl_dict): -# return (wl_dict["id"], wl_dict["z0"], wl_dict["z1"], wl_dict["q"], wl_dict["w"]) -# -# def calc_qcosts(self): -# system_vars = LSMSystem(**self.config["system"]) -# q_cf = CostFunc.EndureQFixedCost(**self.config["system"]) -# -# df = [] -# for wl in self.config["inputs"]["workloads"]: -# wl_id, z0, z1, q, w = self.wl_to_array(wl) -# self.log.info(f"[QFixedCost] Workload: ({z0}, {z1}, {q}, {w})") -# for h in np.arange(0, system_vars.H, 0.5): -# for T in np.arange(2, 100, 4): -# tier_cost = q_cf.calc_cost(h, T, T - 1, z0, z1, q, w) -# level_cost = q_cf.calc_cost(h, T, 1, z0, z1, q, w) -# for Q in range(1, T): -# q_cost = q_cf.calc_cost(h, T, Q, z0, z1, q, w) -# row = { -# "wl_id": wl_id, -# "h": h, -# "T": T, -# "z0": z0, -# "z1": z1, -# "q": q, -# "w": w, -# "Q": Q, -# "new_cost": q_cost, -# "tier_cost": tier_cost, -# "level_cost": level_cost, -# "old_cost": min([tier_cost, level_cost]), -# } -# df.append(row) -# -# return pd.DataFrame(df) -# -# def calc_yzcost(self): -# system_vars = LSMSystem(**self.config["system"]) -# cf = CostFunc.EndureYZHybridCost(**self.config["system"]) -# -# df = [] -# for wl in self.config["inputs"]["workloads"]: -# wl_id, z0, z1, q, w = self.wl_to_array(wl) -# self.log.info(f"[YZCost] Workload: ({z0}, {z1}, {q}, {w})") -# for h in np.arange(0, system_vars.H, 0.25): -# for T in np.arange(2, 50): -# tier_cost = cf.calc_cost(h, T, T - 1, T - 1, z0, z1, q, w) -# level_cost = cf.calc_cost(h, T, 1, 1, z0, z1, q, w) -# for Y in range(1, T): -# for Z in range(0, Y): -# Z += 1 -# new_cost = cf.calc_cost(h, T, Y, Z, z0, z1, q, w) -# row = { -# "wl_id": wl_id, -# "h": h, -# "T": T, -# "z0": z0, -# "z1": z1, -# "q": q, -# "w": w, -# "Y": Y, -# "Z": Z, -# "new_cost": new_cost, -# "tier_cost": tier_cost, -# "level_cost": level_cost, -# "old_cost": min([tier_cost, level_cost]), -# } -# df.append(row) -# -# return pd.DataFrame(df) -# -# def calc_kcost(self): -# system_vars = LSMSystem(**self.config["system"]) -# cf = CostFunc.EndureKHybridCost(**self.config["system"]) -# -# for wl in self.config["inputs"]["workloads"]: -# wl_id, z0, z1, q, w = self.wl_to_array(wl) -# self.log.info(f"[KCost] Workload: ({z0}, {z1}, {q}, {w})") -# df = [] -# for h in np.arange(0, system_vars.H, 0.5): -# for T in np.arange(2, 50, 2): -# levels = int(cf.L(h, T, True)) -# level_assignments = self.create_k_levels(levels, T - 1) -# tiering = np.array([T - 1] * levels) -# leveling = np.array([1] * levels) -# tier_cost = cf.calc_cost(h, T, tiering, z0, z1, q, w) -# level_cost = cf.calc_cost(h, T, leveling, z0, z1, q, w) -# for K in level_assignments: -# K = np.pad(K, (0, MAX_LEVELS - len(K))) -# new_cost = cf.calc_cost(h, T, K, z0, z1, q, w) -# row = { -# "wl_id": wl_id, -# "h": h, -# "T": T, -# "z0": z0, -# "z1": z1, -# "q": q, -# "w": w, -# "B": self.config["system"]["B"], -# "phi": self.config["system"]["phi"], -# "s": self.config["system"]["s"], -# "E": self.config["system"]["E"], -# "H": self.config["system"]["H"], -# "N": self.config["system"]["N"], -# "new_cost": new_cost, -# "tier_cost": tier_cost, -# "level_cost": level_cost, -# "old_cost": min([tier_cost, level_cost]), -# } -# for level_idx in range(MAX_LEVELS): -# row[f"K_{level_idx}"] = K[level_idx] -# df.append(row) -# df = pd.DataFrame(df) -# self.log.info(f"Writing workload ID {wl_id}") -# df.to_feather(f"k_wl_{wl_id}.feather") -# -# return pd.DataFrame(df) -# -# def create_k_levels(self, levels: int, max_size_ratio: int): -# arr = combinations_with_replacement(range(max_size_ratio, 0, -1), levels) -# -# return arr -# -# def run(self) -> None: -# self.log.info("Cost Surface Experiment") -# -# # arr = self.create_k_levels(5, 10) -# # for e in arr: -# # self.log.info(e) -# -# df = self.calc_kcost() -# df.to_feather("cost_surface_k.feather") -# # self.writer.export_csv_file(df, 'cost_surface_k_cost.csv') -# -# # df = self.calc_qcosts() -# # self.writer.export_csv_file(df, 'cost_surface_q_cost.csv') -# -# # df = self.calc_yzcost() -# # self.writer.export_csv_file(df, 'cost_surface_yz_cost.csv') -# -# return None diff --git a/experiments/exp01.py b/experiments/exp01.py deleted file mode 100644 index e405e6a..0000000 --- a/experiments/exp01.py +++ /dev/null @@ -1,68 +0,0 @@ -# import logging -# import pandas as pd -# from data.dataio import Writer -# from lsm.lsmtype import LSMTree, LSMSystem -# from solver.nominalk import NominalQFixedTuning -# from solver.nominal import NominalTierLevelTuning -# -# -# class Exp01: -# def __init__(self, config: dict) -> None: -# self.config = config -# self.log = logging.getLogger("endure") -# -# def get_endurek_trees(self) -> list[dict]: -# self.log.info("Calculating EndureK LSM Tree Tunings...") -# -# system = LSMSystem(**self.config["system"]) -# default_tree = LSMTree(system) -# -# problem = NominalQFixedTuning(system) -# trees = [] -# for id in self.config["exp_config"]["Exp01"]["wl_ids"]: -# wl = self.config["inputs"]["workloads"][id] -# self.log.debug( -# f'Workload: {wl["id"]}: {{z0: {wl["z0"]}, z1: {wl["z1"]}, q: {wl["q"]}, w: {wl["w"]}}}' -# ) -# tree = problem.get_nominal_design(default_tree, wl) -# tree = tree.as_dict() -# tree.update(wl) -# trees.append(tree) -# self.log.debug( -# f'EndureK: h={tree["h"]:.2f}, T={tree["T"]:.2f}, Q={tree["Q"]:.2f}' -# ) -# -# return trees -# -# def get_endure2_trees(self) -> list[dict]: -# self.log.info("Calculating EndureK LSM Tree Tunings...") -# -# system = LSMSystem(**self.config["system"]) -# default_tree = LSMTree(system) -# -# problem = NominalTierLevelTuning(system) -# trees = [] -# for id in self.config["exp_config"]["Exp01"]["wl_ids"]: -# wl = self.config["inputs"]["workloads"][id] -# self.log.debug( -# f'Workload: {wl["id"]}: {{z0: {wl["z0"]}, z1: {wl["z1"]}, q: {wl["q"]}, w: {wl["w"]}}}' -# ) -# tree = problem.get_nominal_design(default_tree, wl) -# tree = tree.as_dict() -# tree.update(wl) -# trees.append(tree) -# self.log.debug( -# f'EndureK: h={tree["h"]:.2f}, T={tree["T"]:.2f}, policy={tree["policy"]}' -# ) -# -# return trees -# -# def run(self) -> None: -# self.log.info("Experiment 01") -# writer = Writer(self.config) -# -# endurek_trees = pd.DataFrame(self.get_endurek_trees()) -# endure2_trees = pd.DataFrame(self.get_endure2_trees()) -# -# writer.export_csv_file(endurek_trees, "exp01_endurek_trees.csv") -# writer.export_csv_file(endure2_trees, "exp01_endure2_trees.csv") diff --git a/jobs/botorch_bo.py b/jobs/botorch_bo.py index c20dc8f..a9e64cf 100644 --- a/jobs/botorch_bo.py +++ b/jobs/botorch_bo.py @@ -1,531 +1,531 @@ -import torch -import numpy as np -from typing import List, Optional, Tuple -import logging -import os -import time -from itertools import product - -from botorch.models import MixedSingleTaskGP -from botorch.fit import fit_gpytorch_model -from gpytorch.mlls import ExactMarginalLogLikelihood -from botorch.acquisition import ExpectedImprovement, UpperConfidenceBound -from botorch.acquisition.monte_carlo import qExpectedImprovement -from botorch.optim import optimize_acqf_mixed -from botorch.models.transforms import Normalize, Standardize -from torch.types import Number - -from endure.data.io import Reader -from endure.lsm.cost import EndureCost -from endure.lsm.types import LSMDesign, System, Policy, Workload, LSMBounds -import endure.lcm.data.generator as Gen -import endure.lsm.solver as Solver -from jobs.infra.db_log import ( - initialize_database, - log_new_run, - log_design_cost, - log_run_details, -) - - -def print_best_designs(best_designs: List[Tuple[LSMDesign, float]]) -> None: - sorted_designs = sorted(best_designs, key=lambda x: x[1]) - print("Best Design Found:") - for design, cost in sorted_designs[:1]: - if design.policy == Policy.KHybrid: - k_values_str = ", ".join(str(k) for k in design.K) - print( - f"Design: h={design.h}, T={design.T}, " - f"Policy={design.policy}, K=[{k_values_str}], " - f"Cost={cost}" - ) - else: - print( - f"Design: h={design.h}, T={design.T}, " - f"Policy={design.policy}, Q={design.Q}, Y={design.Y}," - f" Z={design.Z}, Cost={cost}" - ) - with open("best_designs.txt", "w") as file: - file.write("All Best Designs Found:\n") - for design, cost in best_designs: - file.write( - f"Design: h={design.h}, T={design.T}, " - f"Policy={design.policy}, Q={design.Q}, " - f"Y={design.Y}, Z={design.Z}, Cost={cost}\n" - ) - - -class BayesianPipeline: - def __init__(self, config: dict) -> None: - self.end_time: float = 0 - self.start_time: float = 0 - self.run_id: int = 0 - self.conn = None - self.log: logging.Logger = logging.getLogger(config["log"]["name"]) - - jconfig: dict = config["job"]["BayesianOptimization"] - self.bounds: LSMBounds = LSMBounds(**config["lsm"]["bounds"]) - self.cf: EndureCost = EndureCost(self.bounds.max_considered_levels) - - self.system: System = System(**config["lsm"]["system"]) - self.workload: Workload = Workload(**config["lsm"]["workload"]) - self.initial_samples: int = jconfig["initial_samples"] - self.acquisition_function: str = jconfig["acquisition_function"] - self.num_restarts: int = jconfig["num_restarts"] - self.num_iterations: int = jconfig["num_iterations"] - self.output_dir = os.path.join( - jconfig["database"]["data_dir"], - jconfig["database"]["db_path"], - ) - self.db_path = os.path.join(self.output_dir, jconfig["database"]["db_name"]) - self.model_type = getattr(Policy, config["lsm"]["design"]) - self.num_k_values = jconfig["num_k_values"] - - self.config: dict = config - self.jconfig: dict = jconfig - - def run( - self, - system: Optional[System] = None, - workload: Optional[Workload] = None, - num_iterations: Optional[int] = None, - sample_size: Optional[int] = None, - acqf: Optional[str] = None, - ) -> Tuple[LSMDesign, float]: - self.start_time = time.time() - self.initialize_environment(system, workload, num_iterations, sample_size, acqf) - train_x, train_y, best_y = self._generate_initial_data(self.initial_samples) - best_designs = self.optimization_loop(train_x, train_y, best_y) - best_design, best_cost, _ = self.finalize_optimization(best_designs) - - return best_design, best_cost - - def initialize_environment( - self, - system: Optional[System], - workload: Optional[Workload], - num_iterations: Optional[int], - sample_size: Optional[int], - acqf: Optional[str], - ) -> None: - os.makedirs(self.output_dir, exist_ok=True) - self.conn = initialize_database(self.db_path) - self.system = system if system is not None else self.system - self.initial_samples = ( - sample_size if sample_size is not None else self.initial_samples - ) - self.workload = workload if workload is not None else self.workload - self.acquisition_function = ( - acqf if acqf is not None else self.acquisition_function - ) - self.num_iterations = ( - num_iterations if num_iterations is not None else self.num_iterations - ) - assert self.conn is not None - self.run_id = log_new_run( - self.conn, - self.system, - self.workload, - self.num_iterations, - self.initial_samples, - self.acquisition_function, - ) - - def generate_initial_bounds(self, system: System) -> torch.Tensor: - h_bounds = torch.tensor( - [ - self.bounds.bits_per_elem_range[0], - max(np.floor(system.H), self.bounds.bits_per_elem_range[1]), - ], - dtype=torch.float, - ) - - t_bounds = torch.tensor(self.bounds.size_ratio_range) - policy_bounds = torch.tensor([0, 1]) - if self.model_type == Policy.QFixed: - q_bounds = torch.tensor([1, self.bounds.size_ratio_range[1] - 1]) - bounds = torch.stack([h_bounds, t_bounds, q_bounds], dim=-1) - elif self.model_type == Policy.YZHybrid: - y_bounds = torch.tensor([1, self.bounds.size_ratio_range[1] - 1]) - z_bounds = torch.tensor([1, self.bounds.size_ratio_range[1] - 1]) - bounds = torch.stack([h_bounds, t_bounds, y_bounds, z_bounds], dim=-1) - elif self.model_type == Policy.KHybrid: - lower_limits = [ - self.bounds.bits_per_elem_range[0], - self.bounds.size_ratio_range[0], - ] + [1] * self.num_k_values - upper_limits = [ - max(np.floor(system.H), self.bounds.bits_per_elem_range[1]), - self.bounds.size_ratio_range[1], - ] + [self.bounds.size_ratio_range[1] - 1] * self.num_k_values - new_bounds_list = [lower_limits, upper_limits] - bounds = torch.tensor(new_bounds_list, dtype=torch.float64) - else: - bounds = torch.stack([h_bounds, t_bounds, policy_bounds], dim=-1) - - return bounds - - def optimization_loop( - self, - train_x: torch.Tensor, - train_y: torch.Tensor, - best_y: Number, - ) -> list[tuple[LSMDesign, Number]]: - bounds = self.generate_initial_bounds(self.system) - fixed_feature_list = self._initialize_feature_list(bounds) - best_designs = [] - self.log.debug(f"{best_y=}") - - epochs = self.num_iterations - for i in range(epochs): - new_candidates = self.get_next_points( - train_x, - train_y, - best_y, - bounds, - fixed_feature_list, - self.acquisition_function, - 1, - ) - self.log.debug(f"[it {i + 1}/{epochs}] {new_candidates=}") - _, costs = self.evaluate_new_candidates(new_candidates) - train_x, train_y, best_y, best_designs = self.update_training_data( - train_x, train_y, new_candidates, costs, best_designs - ) - self.log.debug(f"[it {i + 1}/{epochs}] {costs=}") - self.log.debug("Bayesian Optimization completed") - - return best_designs - - def _initialize_feature_list(self, bounds: torch.Tensor) -> List: - t_bounds = bounds[:, 1] - lower_t_bound = int(np.floor(t_bounds[0].item())) - upper_t_bound = int(np.ceil(t_bounds[1].item())) - fixed_features_list = [] - if self.model_type == Policy.Classic: - for size_ratio in range(lower_t_bound, upper_t_bound + 1): - for pol in range(2): - fixed_features_list.append({1: size_ratio, 2: pol}) - elif self.model_type == Policy.QFixed: - for size_ratio in range(lower_t_bound, upper_t_bound + 1): - for q in range(1, size_ratio - 1): - fixed_features_list.append({1: size_ratio, 2: q}) - elif self.model_type == Policy.YZHybrid: - for size_ratio in range(lower_t_bound, upper_t_bound + 1): - for y in range(1, size_ratio - 1): - for z in range(1, size_ratio - 1): - fixed_features_list.append({1: size_ratio, 2: y, 3: z}) - elif self.model_type == Policy.KHybrid: - for t in range(2, upper_t_bound + 1): - param_values = [range(1, upper_t_bound)] * self.num_k_values - for combination in product(*param_values): - fixed_feature = {1: t} - fixed_feature.update( - {i + 2: combination[i] for i in range(len(combination))} - ) - fixed_features_list.append(fixed_feature) - - return fixed_features_list - - def evaluate_new_candidates( - self, new_candidates: torch.Tensor - ) -> Tuple[List[LSMDesign], List[float]]: - new_designs = self.create_designs_from_candidates(new_candidates) - - costs = [ - self.cf.calc_cost( - design, - self.system, - self.workload.z0, - self.workload.z1, - self.workload.q, - self.workload.w, - ) - for design in new_designs - ] - assert self.conn is not None - for design, cost in zip(new_designs, costs): - log_design_cost(self.conn, self.run_id, design, cost) - - return new_designs, costs - - def update_training_data( - self, train_x, train_y, new_candidates, costs, best_designs - ) -> Tuple[torch.Tensor, torch.Tensor, Number, List[Tuple[LSMDesign, Number]]]: - new_target = torch.tensor(costs).unsqueeze(-1) - train_x = torch.cat([train_x, new_candidates]) - train_y = torch.cat([train_y, new_target]) - best_y = train_y.min().item() - best_designs = self._update_best_designs( - best_designs, new_candidates, new_target - ) - - return train_x, train_y, best_y, best_designs - - def create_designs_from_candidates( - self, candidates: torch.Tensor - ) -> List[LSMDesign]: - new_designs = [] - for candidate in candidates: - new_designs += self._generate_new_designs_helper(candidate) - - return new_designs - - def _generate_new_designs_helper(self, candidate: torch.Tensor) -> List[LSMDesign]: - new_designs = [] - h = candidate[0].item() - if h == self.system.H: - h = h - 0.01 - if self.model_type == Policy.QFixed: - size_ratio, q_val = candidate[1].item(), candidate[2].item() - policy = Policy.QFixed - new_designs = [ - LSMDesign(h=h, T=np.ceil(size_ratio), policy=policy, Q=int(q_val)) - ] - # Uncomment the following lines of code if you want the q value to be the same - # through all levels and behave like KLSM - # policy = Policy.KHybrid - k_values = [q_val for _ in range(1, self.bounds.max_considered_levels)] - new_designs = [ - LSMDesign(h=h, T=np.ceil(size_ratio), policy=policy, K=k_values) - ] - elif self.model_type == Policy.YZHybrid: - size_ratio, y_val, z_val = ( - candidate[1].item(), - candidate[2].item(), - candidate[3].item(), - ) - policy = Policy.YZHybrid - new_designs = [ - LSMDesign( - h=h, - T=np.ceil(size_ratio), - policy=policy, - Y=int(y_val), - Z=int(z_val), - ) - ] - elif self.model_type == Policy.KHybrid: - size_ratio = candidate[1].item() - k_values = [cand.item() for cand in candidate[2:]] - policy = Policy.KHybrid - if len(k_values) < self.bounds.max_considered_levels: - k_values += [1] * (self.bounds.max_considered_levels - len(k_values)) - new_designs.append( - LSMDesign(h=h, T=np.ceil(size_ratio), policy=policy, K=k_values) - ) - else: - size_ratio, policy_val = candidate[1].item(), candidate[2].item() - policy = Policy.Leveling if policy_val < 0.5 else Policy.Tiering - new_designs = [LSMDesign(h, np.ceil(size_ratio), policy)] - - return new_designs - - def finalize_optimization(self, best_designs): - elapsed_time = time.time() - self.start_time - sorted_designs = sorted(best_designs, key=lambda x: x[1]) - analaytical_design, analytical_cost = self._find_analytical_results( - self.system, self.workload - ) - best_design, best_cost = sorted_designs[0][0], sorted_designs[0][1] - assert self.conn is not None - log_run_details( - self.conn, - self.run_id, - elapsed_time, - analytical_cost, - best_cost, - analaytical_design, - best_design, - ) - self.conn.close() - - return best_design, best_cost, elapsed_time - - def get_next_points( - self, - x: torch.Tensor, - y: torch.Tensor, - best_y: float, - bounds: torch.Tensor, - fixed_features_list: List, - acquisition_function: str = "ExpectedImprovement", - n_points: int = 1, - ) -> torch.Tensor: - if self.model_type == Policy.QFixed or self.model_type == Policy.Classic: - single_model = MixedSingleTaskGP( - x, - y, - cat_dims=[1, 2], - input_transform=Normalize(d=x.shape[1], bounds=bounds), - outcome_transform=Standardize(m=1), - ) - elif self.model_type == Policy.YZHybrid: - single_model = MixedSingleTaskGP( - x, - y, - cat_dims=[1, 2, 3], - input_transform=Normalize(d=x.shape[1], bounds=bounds), - outcome_transform=Standardize(m=1), - ) - elif self.model_type == Policy.KHybrid: - # the self.num_k_values represents the number of categorical values - # the model is predicting out of the self.max_levels. The +2 is - # because this is the list of indices and the first 2 indices - # represent the 'h' value and then the 'T'value. So everything from - # index 1 till the size of num_k_values + 2 is a categorical value - cat_dims = list(range(1, self.num_k_values + 2)) - single_model = MixedSingleTaskGP( - x, - y, - cat_dims=cat_dims, - input_transform=Normalize(d=x.shape[1], bounds=bounds), - outcome_transform=Standardize(m=1), - ) - else: - raise ValueError(f"Unsupported model type: {self.model_type}") - mll = ExactMarginalLogLikelihood(single_model.likelihood, single_model) - fit_gpytorch_model(mll) - if acquisition_function == "ExpectedImprovement": - acqf = ExpectedImprovement( - model=single_model, best_f=best_y, maximize=False - ) - elif acquisition_function == "UpperConfidenceBound": - beta = self.jconfig["beta_value"] - acqf = UpperConfidenceBound(model=single_model, beta=beta, maximize=False) - elif acquisition_function == "qExpectedImprovement": - acqf = qExpectedImprovement(model=single_model, best_f=-best_y) - else: - raise ValueError(f"Unknown acquisition function: {acquisition_function}") - - candidates, _ = optimize_acqf_mixed( - acq_function=acqf, - bounds=bounds, - q=n_points, - num_restarts=self.num_restarts, - raw_samples=self.jconfig["raw_samples"], - fixed_features_list=fixed_features_list, - ) - return candidates - - def _generate_initial_data( - self, n: int = 30 - ) -> Tuple[torch.Tensor, torch.Tensor, Number]: - train_x = [] - train_y = [] - - generator_class = Gen.get_generator(self.model_type) - generator = generator_class(self.bounds) - - for _ in range(n): - design = generator._sample_design(self.system) - x_vals = np.array([design.h, design.T]) - if self.model_type == Policy.Classic: - policy = 0 if design.policy == Policy.Tiering else 1 - x_vals = np.concatenate((x_vals, [policy])) - elif self.model_type == Policy.QFixed: - x_vals = np.concatenate((x_vals, [design.Q])) - elif self.model_type == Policy.YZHybrid: - x_vals = np.array((x_vals, [design.Y, design.Z])) - elif self.model_type == Policy.KHybrid: - k_values_padded = design.K + [1] * self.num_k_values - k_values_padded = k_values_padded[: self.num_k_values] - x_vals = np.concatenate((x_vals, k_values_padded)) - cost = self.cf.calc_cost( - design, - self.system, - self.workload.z0, - self.workload.z1, - self.workload.q, - self.workload.w, - ) - assert self.conn is not None - log_design_cost(self.conn, self.run_id, design, cost) - train_x.append(x_vals) - train_y.append(cost) - - train_x = np.array(train_x) - train_x = torch.tensor(train_x) - train_y = torch.tensor(train_y, dtype=torch.float64).unsqueeze(-1) - best_y = train_y.min().item() - - return train_x, train_y, best_y - - def _update_best_designs( - self, - best_designs: List[Tuple[LSMDesign, float]], - new_x: torch.Tensor, - new_y: torch.Tensor, - ) -> List[Tuple[LSMDesign, float]]: - for x, y in zip(new_x, new_y): - kwargs = { - "h": x[0].item(), - "T": np.ceil(x[1].item()), - "policy": self.model_type, - } - if self.model_type == Policy.QFixed: - kwargs["Q"] = x[2].item() - elif self.model_type == Policy.YZHybrid: - kwargs["Y"] = x[2].item() - kwargs["Z"] = x[3].item() - elif self.model_type == Policy.KHybrid: - kwargs["K"] = x[2:].tolist() - else: # self.model_type == Policy.Classic - pol = Policy.Leveling if x[2].item() < 0.5 else Policy.Tiering - kwargs["policy"] = pol - best_designs.append((LSMDesign(**kwargs), y.item())) - - return best_designs - - def _find_analytical_results( - self, system: System, workload: Workload, bounds: Optional[LSMBounds] = None - ) -> Tuple[LSMDesign, float]: - bounds = bounds if bounds is not None else self.bounds - if self.model_type == Policy.Classic: - solver = Solver.ClassicSolver(bounds) - elif self.model_type == Policy.QFixed: - solver = Solver.QLSMSolver(bounds) - elif self.model_type == Policy.YZHybrid: - solver = Solver.YZLSMSolver(bounds) - elif self.model_type == Policy.KHybrid: - solver = Solver.KLSMSolver(bounds) - else: - raise KeyError(f"Solver for {self.model_type} not implemented") - - z0, z1, q, w = workload.z0, workload.z1, workload.q, workload.w - opt_design, _ = solver.get_nominal_design(system, z0, z1, q, w) - - if self.model_type == Policy.Classic: - x = np.array([opt_design.h, opt_design.T]) - policy = opt_design.policy - assert isinstance(solver, Solver.ClassicSolver) - cost = solver.nominal_objective(x, policy, system, z0, z1, q, w) - elif self.model_type == Policy.QFixed: - x = np.array([opt_design.h, opt_design.T, opt_design.Q]) - assert isinstance(solver, Solver.QLSMSolver) - cost = solver.nominal_objective(x, system, z0, z1, q, w) - elif self.model_type == Policy.YZHybrid: - x = np.array([opt_design.h, opt_design.T, opt_design.Y, opt_design.Z]) - assert isinstance(solver, Solver.YZLSMSolver) - cost = solver.nominal_objective(x, system, z0, z1, q, w) - elif self.model_type == Policy.KHybrid: - x = np.array([opt_design.h, opt_design.T] + opt_design.K) - assert isinstance(solver, Solver.KLSMSolver) - cost = solver.nominal_objective(x, system, z0, z1, q, w) - else: - raise KeyError(f"Unknown model type {self.model_type}") - - print("Cost for the nominal design using analytical solver: ", cost) - print("Nominal Design suggested by analytical solver: ", opt_design) - - return opt_design, cost - - -if __name__ == "__main__": - config = Reader.read_config("endure.toml") - - log = logging.getLogger(config["log"]["name"]) - log.info("Initializing Bayesian Optimization Job") - - bayesian_optimizer = BayesianPipeline(config) - bayesian_optimizer.run() +# import torch +# import numpy as np +# from typing import List, Optional, Tuple +# import logging +# import os +# import time +# from itertools import product +# +# from botorch.models import MixedSingleTaskGP +# from botorch.fit import fit_gpytorch_model +# from gpytorch.mlls import ExactMarginalLogLikelihood +# from botorch.acquisition import ExpectedImprovement, UpperConfidenceBound +# from botorch.acquisition.monte_carlo import qExpectedImprovement +# from botorch.optim import optimize_acqf_mixed +# from botorch.models.transforms import Normalize, Standardize +# from torch.types import Number +# +# from axe.data.io import Reader +# from axe.lsm.cost import EndureCost +# from axe.lsm.types import LSMDesign, System, Policy, Workload, LSMBounds +# import axe.lcm.data.generator as Gen +# import axe.lsm.solver as Solver +# from jobs.infra.db_log import ( +# initialize_database, +# log_new_run, +# log_design_cost, +# log_run_details, +# ) +# +# +# def print_best_designs(best_designs: List[Tuple[LSMDesign, float]]) -> None: +# sorted_designs = sorted(best_designs, key=lambda x: x[1]) +# print("Best Design Found:") +# for design, cost in sorted_designs[:1]: +# if design.policy == Policy.KHybrid: +# k_values_str = ", ".join(str(k) for k in design.K) +# print( +# f"Design: h={design.h}, T={design.T}, " +# f"Policy={design.policy}, K=[{k_values_str}], " +# f"Cost={cost}" +# ) +# else: +# print( +# f"Design: h={design.h}, T={design.T}, " +# f"Policy={design.policy}, Q={design.Q}, Y={design.Y}," +# f" Z={design.Z}, Cost={cost}" +# ) +# with open("best_designs.txt", "w") as file: +# file.write("All Best Designs Found:\n") +# for design, cost in best_designs: +# file.write( +# f"Design: h={design.h}, T={design.T}, " +# f"Policy={design.policy}, Q={design.Q}, " +# f"Y={design.Y}, Z={design.Z}, Cost={cost}\n" +# ) +# +# +# class BayesianPipeline: +# def __init__(self, config: dict) -> None: +# self.end_time: float = 0 +# self.start_time: float = 0 +# self.run_id: int = 0 +# self.conn = None +# self.log: logging.Logger = logging.getLogger(config["log"]["name"]) +# +# jconfig: dict = config["job"]["BayesianOptimization"] +# self.bounds: LSMBounds = LSMBounds(**config["lsm"]["bounds"]) +# self.cf: EndureCost = EndureCost(self.bounds.max_considered_levels) +# +# self.system: System = System(**config["lsm"]["system"]) +# self.workload: Workload = Workload(**config["lsm"]["workload"]) +# self.initial_samples: int = jconfig["initial_samples"] +# self.acquisition_function: str = jconfig["acquisition_function"] +# self.num_restarts: int = jconfig["num_restarts"] +# self.num_iterations: int = jconfig["num_iterations"] +# self.output_dir = os.path.join( +# jconfig["database"]["data_dir"], +# jconfig["database"]["db_path"], +# ) +# self.db_path = os.path.join(self.output_dir, jconfig["database"]["db_name"]) +# self.model_type = getattr(Policy, config["lsm"]["design"]) +# self.num_k_values = jconfig["num_k_values"] +# +# self.config: dict = config +# self.jconfig: dict = jconfig +# +# def run( +# self, +# system: Optional[System] = None, +# workload: Optional[Workload] = None, +# num_iterations: Optional[int] = None, +# sample_size: Optional[int] = None, +# acqf: Optional[str] = None, +# ) -> Tuple[LSMDesign, float]: +# self.start_time = time.time() +# self.initialize_environment(system, workload, num_iterations, sample_size, acqf) +# train_x, train_y, best_y = self._generate_initial_data(self.initial_samples) +# best_designs = self.optimization_loop(train_x, train_y, best_y) +# best_design, best_cost, _ = self.finalize_optimization(best_designs) +# +# return best_design, best_cost +# +# def initialize_environment( +# self, +# system: Optional[System], +# workload: Optional[Workload], +# num_iterations: Optional[int], +# sample_size: Optional[int], +# acqf: Optional[str], +# ) -> None: +# os.makedirs(self.output_dir, exist_ok=True) +# self.conn = initialize_database(self.db_path) +# self.system = system if system is not None else self.system +# self.initial_samples = ( +# sample_size if sample_size is not None else self.initial_samples +# ) +# self.workload = workload if workload is not None else self.workload +# self.acquisition_function = ( +# acqf if acqf is not None else self.acquisition_function +# ) +# self.num_iterations = ( +# num_iterations if num_iterations is not None else self.num_iterations +# ) +# assert self.conn is not None +# self.run_id = log_new_run( +# self.conn, +# self.system, +# self.workload, +# self.num_iterations, +# self.initial_samples, +# self.acquisition_function, +# ) +# +# def generate_initial_bounds(self, system: System) -> torch.Tensor: +# h_bounds = torch.tensor( +# [ +# self.bounds.bits_per_elem_range[0], +# max(np.floor(system.H), self.bounds.bits_per_elem_range[1]), +# ], +# dtype=torch.float, +# ) +# +# t_bounds = torch.tensor(self.bounds.size_ratio_range) +# policy_bounds = torch.tensor([0, 1]) +# if self.model_type == Policy.QFixed: +# q_bounds = torch.tensor([1, self.bounds.size_ratio_range[1] - 1]) +# bounds = torch.stack([h_bounds, t_bounds, q_bounds], dim=-1) +# elif self.model_type == Policy.YZHybrid: +# y_bounds = torch.tensor([1, self.bounds.size_ratio_range[1] - 1]) +# z_bounds = torch.tensor([1, self.bounds.size_ratio_range[1] - 1]) +# bounds = torch.stack([h_bounds, t_bounds, y_bounds, z_bounds], dim=-1) +# elif self.model_type == Policy.KHybrid: +# lower_limits = [ +# self.bounds.bits_per_elem_range[0], +# self.bounds.size_ratio_range[0], +# ] + [1] * self.num_k_values +# upper_limits = [ +# max(np.floor(system.H), self.bounds.bits_per_elem_range[1]), +# self.bounds.size_ratio_range[1], +# ] + [self.bounds.size_ratio_range[1] - 1] * self.num_k_values +# new_bounds_list = [lower_limits, upper_limits] +# bounds = torch.tensor(new_bounds_list, dtype=torch.float64) +# else: +# bounds = torch.stack([h_bounds, t_bounds, policy_bounds], dim=-1) +# +# return bounds +# +# def optimization_loop( +# self, +# train_x: torch.Tensor, +# train_y: torch.Tensor, +# best_y: Number, +# ) -> list[tuple[LSMDesign, Number]]: +# bounds = self.generate_initial_bounds(self.system) +# fixed_feature_list = self._initialize_feature_list(bounds) +# best_designs = [] +# self.log.debug(f"{best_y=}") +# +# epochs = self.num_iterations +# for i in range(epochs): +# new_candidates = self.get_next_points( +# train_x, +# train_y, +# best_y, +# bounds, +# fixed_feature_list, +# self.acquisition_function, +# 1, +# ) +# self.log.debug(f"[it {i + 1}/{epochs}] {new_candidates=}") +# _, costs = self.evaluate_new_candidates(new_candidates) +# train_x, train_y, best_y, best_designs = self.update_training_data( +# train_x, train_y, new_candidates, costs, best_designs +# ) +# self.log.debug(f"[it {i + 1}/{epochs}] {costs=}") +# self.log.debug("Bayesian Optimization completed") +# +# return best_designs +# +# def _initialize_feature_list(self, bounds: torch.Tensor) -> List: +# t_bounds = bounds[:, 1] +# lower_t_bound = int(np.floor(t_bounds[0].item())) +# upper_t_bound = int(np.ceil(t_bounds[1].item())) +# fixed_features_list = [] +# if self.model_type == Policy.Classic: +# for size_ratio in range(lower_t_bound, upper_t_bound + 1): +# for pol in range(2): +# fixed_features_list.append({1: size_ratio, 2: pol}) +# elif self.model_type == Policy.QFixed: +# for size_ratio in range(lower_t_bound, upper_t_bound + 1): +# for q in range(1, size_ratio - 1): +# fixed_features_list.append({1: size_ratio, 2: q}) +# elif self.model_type == Policy.YZHybrid: +# for size_ratio in range(lower_t_bound, upper_t_bound + 1): +# for y in range(1, size_ratio - 1): +# for z in range(1, size_ratio - 1): +# fixed_features_list.append({1: size_ratio, 2: y, 3: z}) +# elif self.model_type == Policy.KHybrid: +# for t in range(2, upper_t_bound + 1): +# param_values = [range(1, upper_t_bound)] * self.num_k_values +# for combination in product(*param_values): +# fixed_feature = {1: t} +# fixed_feature.update( +# {i + 2: combination[i] for i in range(len(combination))} +# ) +# fixed_features_list.append(fixed_feature) +# +# return fixed_features_list +# +# def evaluate_new_candidates( +# self, new_candidates: torch.Tensor +# ) -> Tuple[List[LSMDesign], List[float]]: +# new_designs = self.create_designs_from_candidates(new_candidates) +# +# costs = [ +# self.cf.calc_cost( +# design, +# self.system, +# self.workload.z0, +# self.workload.z1, +# self.workload.q, +# self.workload.w, +# ) +# for design in new_designs +# ] +# assert self.conn is not None +# for design, cost in zip(new_designs, costs): +# log_design_cost(self.conn, self.run_id, design, cost) +# +# return new_designs, costs +# +# def update_training_data( +# self, train_x, train_y, new_candidates, costs, best_designs +# ) -> Tuple[torch.Tensor, torch.Tensor, Number, List[Tuple[LSMDesign, Number]]]: +# new_target = torch.tensor(costs).unsqueeze(-1) +# train_x = torch.cat([train_x, new_candidates]) +# train_y = torch.cat([train_y, new_target]) +# best_y = train_y.min().item() +# best_designs = self._update_best_designs( +# best_designs, new_candidates, new_target +# ) +# +# return train_x, train_y, best_y, best_designs +# +# def create_designs_from_candidates( +# self, candidates: torch.Tensor +# ) -> List[LSMDesign]: +# new_designs = [] +# for candidate in candidates: +# new_designs += self._generate_new_designs_helper(candidate) +# +# return new_designs +# +# def _generate_new_designs_helper(self, candidate: torch.Tensor) -> List[LSMDesign]: +# new_designs = [] +# h = candidate[0].item() +# if h == self.system.H: +# h = h - 0.01 +# if self.model_type == Policy.QFixed: +# size_ratio, q_val = candidate[1].item(), candidate[2].item() +# policy = Policy.QFixed +# new_designs = [ +# LSMDesign(h=h, T=np.ceil(size_ratio), policy=policy, Q=int(q_val)) +# ] +# # Uncomment the following lines of code if you want the q value to be the same +# # through all levels and behave like KLSM +# # policy = Policy.KHybrid +# k_values = [q_val for _ in range(1, self.bounds.max_considered_levels)] +# new_designs = [ +# LSMDesign(h=h, T=np.ceil(size_ratio), policy=policy, K=k_values) +# ] +# elif self.model_type == Policy.YZHybrid: +# size_ratio, y_val, z_val = ( +# candidate[1].item(), +# candidate[2].item(), +# candidate[3].item(), +# ) +# policy = Policy.YZHybrid +# new_designs = [ +# LSMDesign( +# h=h, +# T=np.ceil(size_ratio), +# policy=policy, +# Y=int(y_val), +# Z=int(z_val), +# ) +# ] +# elif self.model_type == Policy.KHybrid: +# size_ratio = candidate[1].item() +# k_values = [cand.item() for cand in candidate[2:]] +# policy = Policy.KHybrid +# if len(k_values) < self.bounds.max_considered_levels: +# k_values += [1] * (self.bounds.max_considered_levels - len(k_values)) +# new_designs.append( +# LSMDesign(h=h, T=np.ceil(size_ratio), policy=policy, K=k_values) +# ) +# else: +# size_ratio, policy_val = candidate[1].item(), candidate[2].item() +# policy = Policy.Leveling if policy_val < 0.5 else Policy.Tiering +# new_designs = [LSMDesign(h, np.ceil(size_ratio), policy)] +# +# return new_designs +# +# def finalize_optimization(self, best_designs): +# elapsed_time = time.time() - self.start_time +# sorted_designs = sorted(best_designs, key=lambda x: x[1]) +# analaytical_design, analytical_cost = self._find_analytical_results( +# self.system, self.workload +# ) +# best_design, best_cost = sorted_designs[0][0], sorted_designs[0][1] +# assert self.conn is not None +# log_run_details( +# self.conn, +# self.run_id, +# elapsed_time, +# analytical_cost, +# best_cost, +# analaytical_design, +# best_design, +# ) +# self.conn.close() +# +# return best_design, best_cost, elapsed_time +# +# def get_next_points( +# self, +# x: torch.Tensor, +# y: torch.Tensor, +# best_y: float, +# bounds: torch.Tensor, +# fixed_features_list: List, +# acquisition_function: str = "ExpectedImprovement", +# n_points: int = 1, +# ) -> torch.Tensor: +# if self.model_type == Policy.QFixed or self.model_type == Policy.Classic: +# single_model = MixedSingleTaskGP( +# x, +# y, +# cat_dims=[1, 2], +# input_transform=Normalize(d=x.shape[1], bounds=bounds), +# outcome_transform=Standardize(m=1), +# ) +# elif self.model_type == Policy.YZHybrid: +# single_model = MixedSingleTaskGP( +# x, +# y, +# cat_dims=[1, 2, 3], +# input_transform=Normalize(d=x.shape[1], bounds=bounds), +# outcome_transform=Standardize(m=1), +# ) +# elif self.model_type == Policy.KHybrid: +# # the self.num_k_values represents the number of categorical values +# # the model is predicting out of the self.max_levels. The +2 is +# # because this is the list of indices and the first 2 indices +# # represent the 'h' value and then the 'T'value. So everything from +# # index 1 till the size of num_k_values + 2 is a categorical value +# cat_dims = list(range(1, self.num_k_values + 2)) +# single_model = MixedSingleTaskGP( +# x, +# y, +# cat_dims=cat_dims, +# input_transform=Normalize(d=x.shape[1], bounds=bounds), +# outcome_transform=Standardize(m=1), +# ) +# else: +# raise ValueError(f"Unsupported model type: {self.model_type}") +# mll = ExactMarginalLogLikelihood(single_model.likelihood, single_model) +# fit_gpytorch_model(mll) +# if acquisition_function == "ExpectedImprovement": +# acqf = ExpectedImprovement( +# model=single_model, best_f=best_y, maximize=False +# ) +# elif acquisition_function == "UpperConfidenceBound": +# beta = self.jconfig["beta_value"] +# acqf = UpperConfidenceBound(model=single_model, beta=beta, maximize=False) +# elif acquisition_function == "qExpectedImprovement": +# acqf = qExpectedImprovement(model=single_model, best_f=-best_y) +# else: +# raise ValueError(f"Unknown acquisition function: {acquisition_function}") +# +# candidates, _ = optimize_acqf_mixed( +# acq_function=acqf, +# bounds=bounds, +# q=n_points, +# num_restarts=self.num_restarts, +# raw_samples=self.jconfig["raw_samples"], +# fixed_features_list=fixed_features_list, +# ) +# return candidates +# +# def _generate_initial_data( +# self, n: int = 30 +# ) -> Tuple[torch.Tensor, torch.Tensor, Number]: +# train_x = [] +# train_y = [] +# +# generator_class = Gen.get_generator(self.model_type) +# generator = generator_class(self.bounds) +# +# for _ in range(n): +# design = generator._sample_design(self.system) +# x_vals = np.array([design.h, design.T]) +# if self.model_type == Policy.Classic: +# policy = 0 if design.policy == Policy.Tiering else 1 +# x_vals = np.concatenate((x_vals, [policy])) +# elif self.model_type == Policy.QFixed: +# x_vals = np.concatenate((x_vals, [design.Q])) +# elif self.model_type == Policy.YZHybrid: +# x_vals = np.array((x_vals, [design.Y, design.Z])) +# elif self.model_type == Policy.KHybrid: +# k_values_padded = design.K + [1] * self.num_k_values +# k_values_padded = k_values_padded[: self.num_k_values] +# x_vals = np.concatenate((x_vals, k_values_padded)) +# cost = self.cf.calc_cost( +# design, +# self.system, +# self.workload.z0, +# self.workload.z1, +# self.workload.q, +# self.workload.w, +# ) +# assert self.conn is not None +# log_design_cost(self.conn, self.run_id, design, cost) +# train_x.append(x_vals) +# train_y.append(cost) +# +# train_x = np.array(train_x) +# train_x = torch.tensor(train_x) +# train_y = torch.tensor(train_y, dtype=torch.float64).unsqueeze(-1) +# best_y = train_y.min().item() +# +# return train_x, train_y, best_y +# +# def _update_best_designs( +# self, +# best_designs: List[Tuple[LSMDesign, float]], +# new_x: torch.Tensor, +# new_y: torch.Tensor, +# ) -> List[Tuple[LSMDesign, float]]: +# for x, y in zip(new_x, new_y): +# kwargs = { +# "h": x[0].item(), +# "T": np.ceil(x[1].item()), +# "policy": self.model_type, +# } +# if self.model_type == Policy.QFixed: +# kwargs["Q"] = x[2].item() +# elif self.model_type == Policy.YZHybrid: +# kwargs["Y"] = x[2].item() +# kwargs["Z"] = x[3].item() +# elif self.model_type == Policy.KHybrid: +# kwargs["K"] = x[2:].tolist() +# else: # self.model_type == Policy.Classic +# pol = Policy.Leveling if x[2].item() < 0.5 else Policy.Tiering +# kwargs["policy"] = pol +# best_designs.append((LSMDesign(**kwargs), y.item())) +# +# return best_designs +# +# def _find_analytical_results( +# self, system: System, workload: Workload, bounds: Optional[LSMBounds] = None +# ) -> Tuple[LSMDesign, float]: +# bounds = bounds if bounds is not None else self.bounds +# if self.model_type == Policy.Classic: +# solver = Solver.ClassicSolver(bounds) +# elif self.model_type == Policy.QFixed: +# solver = Solver.QLSMSolver(bounds) +# elif self.model_type == Policy.YZHybrid: +# solver = Solver.YZLSMSolver(bounds) +# elif self.model_type == Policy.KHybrid: +# solver = Solver.KLSMSolver(bounds) +# else: +# raise KeyError(f"Solver for {self.model_type} not implemented") +# +# z0, z1, q, w = workload.z0, workload.z1, workload.q, workload.w +# opt_design, _ = solver.get_nominal_design(system, z0, z1, q, w) +# +# if self.model_type == Policy.Classic: +# x = np.array([opt_design.h, opt_design.T]) +# policy = opt_design.policy +# assert isinstance(solver, Solver.ClassicSolver) +# cost = solver.nominal_objective(x, policy, system, z0, z1, q, w) +# elif self.model_type == Policy.QFixed: +# x = np.array([opt_design.h, opt_design.T, opt_design.Q]) +# assert isinstance(solver, Solver.QLSMSolver) +# cost = solver.nominal_objective(x, system, z0, z1, q, w) +# elif self.model_type == Policy.YZHybrid: +# x = np.array([opt_design.h, opt_design.T, opt_design.Y, opt_design.Z]) +# assert isinstance(solver, Solver.YZLSMSolver) +# cost = solver.nominal_objective(x, system, z0, z1, q, w) +# elif self.model_type == Policy.KHybrid: +# x = np.array([opt_design.h, opt_design.T] + opt_design.K) +# assert isinstance(solver, Solver.KLSMSolver) +# cost = solver.nominal_objective(x, system, z0, z1, q, w) +# else: +# raise KeyError(f"Unknown model type {self.model_type}") +# +# print("Cost for the nominal design using analytical solver: ", cost) +# print("Nominal Design suggested by analytical solver: ", opt_design) +# +# return opt_design, cost +# +# +# if __name__ == "__main__": +# config = Reader.read_config("endure.toml") +# +# log = logging.getLogger(config["log"]["name"]) +# log.info("Initializing Bayesian Optimization Job") +# +# bayesian_optimizer = BayesianPipeline(config) +# bayesian_optimizer.run() diff --git a/jobs/create_tunings.py b/jobs/create_tunings.py index 95e797c..e4f53b4 100644 --- a/jobs/create_tunings.py +++ b/jobs/create_tunings.py @@ -4,9 +4,9 @@ import numpy as np import pandas as pd -from endure.lcm.data.generator import LCMDataGenerator -import endure.lsm.solver as Solvers -from endure.lsm.types import Policy +from axe.lcm.data.generator import LCMDataGenerator +import axe.lsm.solver as Solvers +from axe.lsm.types import Policy class CreateTuningsJob: diff --git a/jobs/data_gen.py b/jobs/data_gen.py index 011d823..5b7a6bc 100644 --- a/jobs/data_gen.py +++ b/jobs/data_gen.py @@ -8,10 +8,10 @@ import pyarrow as pa import pyarrow.parquet as pq -from endure.data.io import Reader -from endure.lsm.types import Policy, LSMBounds -from endure.ltune.data.generator import LTuneDataGenerator -import endure.lcm.data.generator as Generators +from axe.data.io import Reader +from axe.lsm.types import Policy, LSMBounds +from axe.ltune.data.generator import LTuneDataGenerator +import axe.lcm.data.generator as Generators class DataGenJob: diff --git a/jobs/infra/bo_job_runs.py b/jobs/infra/bo_job_runs.py index 978f083..10a29da 100644 --- a/jobs/infra/bo_job_runs.py +++ b/jobs/infra/bo_job_runs.py @@ -6,10 +6,10 @@ sys.path.append(os.path.join(sys.path[0], "../../")) -from endure.lsm.types import LSMBounds, Workload -from endure.lcm.data.generator import LCMDataGenerator +from axe.lsm.types import LSMBounds, Workload +from axe.lcm.data.generator import LCMDataGenerator from jobs.bayesian_pipeline import BayesianPipeline -from endure.lsm.cost import EndureCost +from axe.lsm.cost import EndureCost def to_cuda(obj, seen=None): @@ -96,7 +96,7 @@ def compare_designs(n_runs=100, csv_filename="yz_design_comparison.csv"): if __name__ == "__main__": file_dir = os.path.dirname(__file__) - config_path = os.path.join(file_dir, "endure.toml") + config_path = os.path.join(file_dir, "axe.toml") with open(config_path) as fid: config = toml.load(fid) bayesian_optimizer = BayesianPipeline(config) diff --git a/jobs/infra/db_log.py b/jobs/infra/db_log.py index ff1b6c5..42392ee 100644 --- a/jobs/infra/db_log.py +++ b/jobs/infra/db_log.py @@ -1,6 +1,6 @@ import sqlite3 -from endure.data.io import Reader -from endure.lsm.types import LSMDesign, Policy, System, Workload +from axe.data.io import Reader +from axe.lsm.types import LSMDesign, Policy, System, Workload def initialize_database(db_path: str = "cost_log.db") -> sqlite3.Connection: @@ -227,6 +227,6 @@ def log_run_details( if __name__ == "__main__": - config = Reader.read_config("endure.toml") + config = Reader.read_config("axe.toml") conn = initialize_database() conn.close() diff --git a/jobs/lcm_hyperparam_tune.py b/jobs/lcm_hyperparam_tune.py index 131eef2..8f18273 100755 --- a/jobs/lcm_hyperparam_tune.py +++ b/jobs/lcm_hyperparam_tune.py @@ -1,218 +1,218 @@ -#!/usr/bin/env python -from typing import Any -import toml -import logging -import os -import tempfile -from endure.util.lr_scheduler import LRSchedulerBuilder - -import ray -import ray.train as RayTrain -import ray.tune as RayTune -from ray.tune.schedulers import ASHAScheduler -import torch -from torch.utils.data import DataLoader - -from endure.lcm.data.dataset import LCMDataSet -from endure.lcm.model.builder import LearnedCostModelBuilder -from endure.lsm.types import STR_POLICY_DICT, Policy -from endure.util.losses import LossBuilder -from endure.util.optimizer import OptimizerBuilder - - -def build_train(cfg, lsm_design: Policy) -> LCMDataSet: - train_dir: str = os.path.join( - cfg["io"]["data_dir"], - cfg["job"]["LCMTrain"]["train"]["dir"], - ) - train = LCMDataSet( - folder=train_dir, - lsm_design=lsm_design, - min_size_ratio=cfg["lsm"]["size_ratio"]["min"], - max_size_ratio=cfg["lsm"]["size_ratio"]["max"], - max_levels=cfg["lsm"]["max_levels"], - test=False, - shuffle=cfg["job"]["LCMTrain"]["train"]["shuffle"], - ) - - return train - - -def build_validate(cfg, lsm_design: Policy) -> LCMDataSet: - validate_dir = os.path.join( - cfg["io"]["data_dir"], - cfg["job"]["LCMTrain"]["test"]["dir"], - ) - validate = LCMDataSet( - folder=validate_dir, - lsm_design=lsm_design, - min_size_ratio=cfg["lsm"]["size_ratio"]["min"], - max_size_ratio=cfg["lsm"]["size_ratio"]["max"], - max_levels=cfg["lsm"]["max_levels"], - test=True, - shuffle=cfg["job"]["LCMTrain"]["test"]["shuffle"], - ) - - return validate - - -def train_lcm(cfg: dict[str, Any]): - lsm_choice = STR_POLICY_DICT.get(cfg["lsm"]["design"], Policy.KHybrid) - size_ratio_min = cfg["lsm"]["size_ratio"]["min"] - size_ratio_max = cfg["lsm"]["size_ratio"]["max"] - net_builder = LearnedCostModelBuilder( - size_ratio_range=(size_ratio_min, size_ratio_max), - max_levels=cfg["lsm"]["max_levels"], - **cfg["lcm"]["model"], - ) - net = net_builder.build_model(lsm_choice) - - device = "cpu" - if torch.cuda.is_available(): - device = "cuda:0" - net.to(device) - - criterion = LossBuilder(cfg).build(cfg["job"]["LCMTrain"]["loss_fn"]) - if criterion is None: - raise TypeError(f"Loss choice invalid: {cfg['job']['LCMTrain']['loss_fn']}") - assert criterion is not None - optimizer = OptimizerBuilder(cfg).build_optimizer( - cfg["job"]["LCMTrain"]["optimizer"], net - ) - scheduler = LRSchedulerBuilder(cfg).build_scheduler( - optimizer, cfg["job"]["LCMTrain"]["lr_scheduler"] - ) - - if RayTrain.get_checkpoint(): - loaded_checkpoint = RayTrain.get_checkpoint() - with loaded_checkpoint.as_directory() as loaded_checkpoint_dir: - model_state, optimizer_state = torch.load( - os.path.join(loaded_checkpoint_dir, "checkpoint.pt") - ) - net.load_state_dict(model_state) - optimizer.load_state_dict(optimizer_state) - - train_data = build_train(cfg, lsm_choice) - train_set = DataLoader( - train_data, - batch_size=cfg["job"]["LCMTrain"]["train"]["batch_size"], - drop_last=cfg["job"]["LCMTrain"]["train"]["drop_last"], - num_workers=cfg["job"]["LCMTrain"]["train"]["num_workers"], - pin_memory=True, - ) - validate_data = build_validate(cfg, lsm_choice) - validate_set = DataLoader( - validate_data, - batch_size=cfg["job"]["LCMTrain"]["test"]["batch_size"], - drop_last=cfg["job"]["LCMTrain"]["test"]["drop_last"], - num_workers=cfg["job"]["LCMTrain"]["test"]["num_workers"], - pin_memory=True, - ) - - for epoch in range(20): # loop over the dataset multiple times - running_loss = 0.0 - epoch_steps = 0 - net.train() - for i, data in enumerate(train_set): - # get the inputs; data is a list of [inputs, labels] - labels, feats = data - labels, feats = labels.to(device), feats.to(device) - - # zero the parameter gradients - optimizer.zero_grad() - - # forward + backward + optimize - pred = net(feats) - loss = criterion(pred, labels) - loss.backward() - optimizer.step() - - # print statistics - running_loss += loss.item() - epoch_steps += 1 - if i % 2000 == 1999: # print every 2000 mini-batches - print( - "[%d, %5d] loss: %.3f" - % (epoch + 1, i + 1, running_loss / epoch_steps) - ) - running_loss = 0.0 - if scheduler is not None: - scheduler.step() - - # Validation loss - val_loss = 0.0 - val_steps = 0 - net.eval() - for i, data in enumerate(validate_set, 0): - with torch.no_grad(): - labels, feats = data - labels, feats = labels.to(device), feats.to(device) - - pred = net(feats) - loss = criterion(pred, labels) - val_loss += loss.cpu().numpy() - val_steps += 1 - - with tempfile.TemporaryDirectory() as temp_checkpoint_dir: - path = os.path.join(temp_checkpoint_dir, "checkpoint.pt") - torch.save((net.state_dict(), optimizer.state_dict()), path) - checkpoint = RayTrain.Checkpoint.from_directory(temp_checkpoint_dir) - RayTrain.report( - {"loss": (val_loss / val_steps)}, - checkpoint=checkpoint, - ) - print("Finished Training") - - -def main(): - from endure.data.io import Reader - - config = Reader.read_config("endure.toml") - - logging.basicConfig( - format=config["log"]["format"], datefmt=config["log"]["datefmt"] - ) - - log = logging.getLogger(config["log"]["name"]) - log.setLevel(config["log"]["level"]) - - config["lcm"]["model"]["embedding_size"] = RayTune.choice([4, 8]) - config["lcm"]["model"]["hidden_length"] = RayTune.choice([2, 3, 4]) - config["lcm"]["model"]["hidden_width"] = RayTune.choice([32, 64, 128]) - config["train"]["optimizer"]["Adam"]["lr"] = RayTune.loguniform(1e-4, 1e-1) - config["job"]["LCMTrain"]["lr_scheduler"] = RayTune.choice( - ["CosineAnnealing", "Constant"] - ) - config["job"]["LCMTrain"]["train"]["batch_size"] = RayTune.choice( - [1024, 2048, 4096, 8192, 16384] - ) - scheduler = ASHAScheduler(grace_period=3, max_t=20, reduction_factor=2) - - ray.init(num_gpus=1) - - tuner = RayTune.Tuner( - RayTune.with_resources( - RayTune.with_parameters(train_lcm), - resources={"cpu": 4, "gpu": 0}, - ), - tune_config=RayTune.TuneConfig( - metric="loss", - mode="min", - scheduler=scheduler, - num_samples=2, - ), - param_space=config, - ) - results = tuner.fit() - best_result = results.get_best_result("loss", "min") - assert best_result.config is not None - assert best_result.metrics is not None - - print("Best trial config: {}".format(best_result.config)) - with open('best.toml', "w") as fid: - toml.dump(best_result.config, fid) - print("Best trial final validation loss: {}".format(best_result.metrics["loss"])) - - -if __name__ == "__main__": - main() +# #!/usr/bin/env python +# from typing import Any +# import toml +# import logging +# import os +# import tempfile +# from axe.util.lr_scheduler import LRSchedulerBuilder +# +# import ray +# import ray.train as RayTrain +# import ray.tune as RayTune +# from ray.tune.schedulers import ASHAScheduler +# import torch +# from torch.utils.data import DataLoader +# +# from axe.lcm.data.dataset import LCMDataSet +# from axe.lcm.model.builder import LearnedCostModelBuilder +# from axe.lsm.types import STR_POLICY_DICT, Policy +# from axe.util.losses import LossBuilder +# from axe.util.optimizer import OptimizerBuilder +# +# +# def build_train(cfg, lsm_design: Policy) -> LCMDataSet: +# train_dir: str = os.path.join( +# cfg["io"]["data_dir"], +# cfg["job"]["LCMTrain"]["train"]["dir"], +# ) +# train = LCMDataSet( +# folder=train_dir, +# lsm_design=lsm_design, +# min_size_ratio=cfg["lsm"]["size_ratio"]["min"], +# max_size_ratio=cfg["lsm"]["size_ratio"]["max"], +# max_levels=cfg["lsm"]["max_levels"], +# test=False, +# shuffle=cfg["job"]["LCMTrain"]["train"]["shuffle"], +# ) +# +# return train +# +# +# def build_validate(cfg, lsm_design: Policy) -> LCMDataSet: +# validate_dir = os.path.join( +# cfg["io"]["data_dir"], +# cfg["job"]["LCMTrain"]["test"]["dir"], +# ) +# validate = LCMDataSet( +# folder=validate_dir, +# lsm_design=lsm_design, +# min_size_ratio=cfg["lsm"]["size_ratio"]["min"], +# max_size_ratio=cfg["lsm"]["size_ratio"]["max"], +# max_levels=cfg["lsm"]["max_levels"], +# test=True, +# shuffle=cfg["job"]["LCMTrain"]["test"]["shuffle"], +# ) +# +# return validate +# +# +# def train_lcm(cfg: dict[str, Any]): +# lsm_choice = STR_POLICY_DICT.get(cfg["lsm"]["design"], Policy.KHybrid) +# size_ratio_min = cfg["lsm"]["size_ratio"]["min"] +# size_ratio_max = cfg["lsm"]["size_ratio"]["max"] +# net_builder = LearnedCostModelBuilder( +# size_ratio_range=(size_ratio_min, size_ratio_max), +# max_levels=cfg["lsm"]["max_levels"], +# **cfg["lcm"]["model"], +# ) +# net = net_builder.build_model(lsm_choice) +# +# device = "cpu" +# if torch.cuda.is_available(): +# device = "cuda:0" +# net.to(device) +# +# criterion = LossBuilder(cfg).build(cfg["job"]["LCMTrain"]["loss_fn"]) +# if criterion is None: +# raise TypeError(f"Loss choice invalid: {cfg['job']['LCMTrain']['loss_fn']}") +# assert criterion is not None +# optimizer = OptimizerBuilder(cfg).build_optimizer( +# cfg["job"]["LCMTrain"]["optimizer"], net +# ) +# scheduler = LRSchedulerBuilder(cfg).build_scheduler( +# optimizer, cfg["job"]["LCMTrain"]["lr_scheduler"] +# ) +# +# if RayTrain.get_checkpoint(): +# loaded_checkpoint = RayTrain.get_checkpoint() +# with loaded_checkpoint.as_directory() as loaded_checkpoint_dir: +# model_state, optimizer_state = torch.load( +# os.path.join(loaded_checkpoint_dir, "checkpoint.pt") +# ) +# net.load_state_dict(model_state) +# optimizer.load_state_dict(optimizer_state) +# +# train_data = build_train(cfg, lsm_choice) +# train_set = DataLoader( +# train_data, +# batch_size=cfg["job"]["LCMTrain"]["train"]["batch_size"], +# drop_last=cfg["job"]["LCMTrain"]["train"]["drop_last"], +# num_workers=cfg["job"]["LCMTrain"]["train"]["num_workers"], +# pin_memory=True, +# ) +# validate_data = build_validate(cfg, lsm_choice) +# validate_set = DataLoader( +# validate_data, +# batch_size=cfg["job"]["LCMTrain"]["test"]["batch_size"], +# drop_last=cfg["job"]["LCMTrain"]["test"]["drop_last"], +# num_workers=cfg["job"]["LCMTrain"]["test"]["num_workers"], +# pin_memory=True, +# ) +# +# for epoch in range(20): # loop over the dataset multiple times +# running_loss = 0.0 +# epoch_steps = 0 +# net.train() +# for i, data in enumerate(train_set): +# # get the inputs; data is a list of [inputs, labels] +# labels, feats = data +# labels, feats = labels.to(device), feats.to(device) +# +# # zero the parameter gradients +# optimizer.zero_grad() +# +# # forward + backward + optimize +# pred = net(feats) +# loss = criterion(pred, labels) +# loss.backward() +# optimizer.step() +# +# # print statistics +# running_loss += loss.item() +# epoch_steps += 1 +# if i % 2000 == 1999: # print every 2000 mini-batches +# print( +# "[%d, %5d] loss: %.3f" +# % (epoch + 1, i + 1, running_loss / epoch_steps) +# ) +# running_loss = 0.0 +# if scheduler is not None: +# scheduler.step() +# +# # Validation loss +# val_loss = 0.0 +# val_steps = 0 +# net.eval() +# for i, data in enumerate(validate_set, 0): +# with torch.no_grad(): +# labels, feats = data +# labels, feats = labels.to(device), feats.to(device) +# +# pred = net(feats) +# loss = criterion(pred, labels) +# val_loss += loss.cpu().numpy() +# val_steps += 1 +# +# with tempfile.TemporaryDirectory() as temp_checkpoint_dir: +# path = os.path.join(temp_checkpoint_dir, "checkpoint.pt") +# torch.save((net.state_dict(), optimizer.state_dict()), path) +# checkpoint = RayTrain.Checkpoint.from_directory(temp_checkpoint_dir) +# RayTrain.report( +# {"loss": (val_loss / val_steps)}, +# checkpoint=checkpoint, +# ) +# print("Finished Training") +# +# +# def main(): +# from axe.data.io import Reader +# +# config = Reader.read_config("axe.toml") +# +# logging.basicConfig( +# format=config["log"]["format"], datefmt=config["log"]["datefmt"] +# ) +# +# log = logging.getLogger(config["log"]["name"]) +# log.setLevel(config["log"]["level"]) +# +# config["lcm"]["model"]["embedding_size"] = RayTune.choice([4, 8]) +# config["lcm"]["model"]["hidden_length"] = RayTune.choice([2, 3, 4]) +# config["lcm"]["model"]["hidden_width"] = RayTune.choice([32, 64, 128]) +# config["train"]["optimizer"]["Adam"]["lr"] = RayTune.loguniform(1e-4, 1e-1) +# config["job"]["LCMTrain"]["lr_scheduler"] = RayTune.choice( +# ["CosineAnnealing", "Constant"] +# ) +# config["job"]["LCMTrain"]["train"]["batch_size"] = RayTune.choice( +# [1024, 2048, 4096, 8192, 16384] +# ) +# scheduler = ASHAScheduler(grace_period=3, max_t=20, reduction_factor=2) +# +# ray.init(num_gpus=1) +# +# tuner = RayTune.Tuner( +# RayTune.with_resources( +# RayTune.with_parameters(train_lcm), +# resources={"cpu": 4, "gpu": 0}, +# ), +# tune_config=RayTune.TuneConfig( +# metric="loss", +# mode="min", +# scheduler=scheduler, +# num_samples=2, +# ), +# param_space=config, +# ) +# results = tuner.fit() +# best_result = results.get_best_result("loss", "min") +# assert best_result.config is not None +# assert best_result.metrics is not None +# +# print("Best trial config: {}".format(best_result.config)) +# with open('best.toml', "w") as fid: +# toml.dump(best_result.config, fid) +# print("Best trial final validation loss: {}".format(best_result.metrics["loss"])) +# +# +# if __name__ == "__main__": +# main() diff --git a/jobs/lcm_train.py b/jobs/lcm_train.py index ca2f467..d2768c5 100755 --- a/jobs/lcm_train.py +++ b/jobs/lcm_train.py @@ -8,13 +8,13 @@ import torch import torch.optim as TorchOpt -from endure.lcm.data.dataset import LCMDataSet -from endure.lcm.model.builder import LearnedCostModelBuilder -from endure.lsm.types import LSMBounds, Policy -from endure.util.losses import LossBuilder -from endure.util.lr_scheduler import LRSchedulerBuilder -from endure.util.optimizer import OptimizerBuilder -from endure.util.trainer import Trainer +from axe.lcm.data.dataset import LCMDataSet +from axe.lcm.model.builder import LearnedCostModelBuilder +from axe.lsm.types import LSMBounds, Policy +from axe.util.losses import LossBuilder +from axe.util.lr_scheduler import LRSchedulerBuilder +from axe.util.optimizer import OptimizerBuilder +from axe.util.trainer import Trainer class LCMTrainJob: @@ -123,7 +123,7 @@ def _make_save_dir(self) -> Optional[str]: except FileExistsError: return None - with open(os.path.join(save_dir, "endure.toml"), "w") as fid: + with open(os.path.join(save_dir, "axe.toml"), "w") as fid: toml.dump(self.config, fid) return save_dir @@ -162,9 +162,9 @@ def run(self) -> Optional[Trainer]: if __name__ == "__main__": - from endure.data.io import Reader + from axe.data.io import Reader - config = Reader.read_config("endure.toml") + config = Reader.read_config("axe.toml") logging.basicConfig( format=config["log"]["format"], datefmt=config["log"]["datefmt"], diff --git a/jobs/ltune_train.py b/jobs/ltune_train.py index 093e954..49a954f 100644 --- a/jobs/ltune_train.py +++ b/jobs/ltune_train.py @@ -8,13 +8,13 @@ import torch.optim as Opt from torch.utils.data import DataLoader -from endure.lsm.types import LSMBounds, Policy -from endure.ltune.data.dataset import LTuneDataSet -from endure.ltune.loss import LearnedCostModelLoss -from endure.ltune.model.builder import LTuneModelBuilder -from endure.util.lr_scheduler import LRSchedulerBuilder -from endure.util.optimizer import OptimizerBuilder -from endure.util.trainer import Trainer +from axe.lsm.types import LSMBounds, Policy +from axe.ltune.data.dataset import LTuneDataSet +from axe.ltune.loss import LearnedCostModelLoss +from axe.ltune.model.builder import LTuneModelBuilder +from axe.util.lr_scheduler import LRSchedulerBuilder +from axe.util.optimizer import OptimizerBuilder +from axe.util.trainer import Trainer class LTuneTrainJob: @@ -110,7 +110,7 @@ def _make_save_dir(self) -> bool: except FileExistsError: return False - with open(os.path.join(self.save_dir, "endure.toml"), "w") as fid: + with open(os.path.join(self.save_dir, "axe.toml"), "w") as fid: toml.dump(self.config, fid) return True @@ -182,9 +182,9 @@ def run(self) -> Optional[Trainer]: return trainer def main(): - from endure.data.io import Reader + from axe.data.io import Reader - config = Reader.read_config("endure.toml") + config = Reader.read_config("axe.toml") logging.basicConfig( format=config["log"]["format"], datefmt=config["log"]["datefmt"] ) diff --git a/jobs/mlos_bo.py b/jobs/mlos_bo.py index 79c6f92..fab27d4 100644 --- a/jobs/mlos_bo.py +++ b/jobs/mlos_bo.py @@ -1,16 +1,14 @@ -import os -import toml import logging import ConfigSpace as CS import numpy as np import pandas as pd import mlos_core.optimizers -from endure.lsm.cost import EndureCost -from endure.lcm.data.generator import KHybridGenerator -from endure.lsm.solver import KLSMSolver -from endure.lsm.types import LSMDesign, System, Policy, Workload, LSMBounds -from endure.data.io import Reader +from axe.lsm.cost import EndureCost +from axe.lcm.data.generator import KHybridGenerator +from axe.lsm.solver import KLSMSolver +from axe.lsm.types import LSMDesign, System, Policy, Workload, LSMBounds +from axe.data.io import Reader def export_to_csv(mlos_costs, analytical_costs, mlos_designs, analytical_designs, systems, workloads) -> None: @@ -154,7 +152,7 @@ def find_analytical_results(system: System, workload: Workload, bounds: LSMBound if __name__ == "__main__": - config = Reader.read_config("endure.toml") + config = Reader.read_config("axe.toml") log = logging.getLogger(config["log"]["name"]) log.info("Initializing Bayesian Optimization Job") diff --git a/jobs/mlos_exp_runs.py b/jobs/mlos_exp_runs.py index 1905bf4..92aff6d 100755 --- a/jobs/mlos_exp_runs.py +++ b/jobs/mlos_exp_runs.py @@ -4,9 +4,9 @@ import ConfigSpace as CS import numpy as np import pandas as pd -from endure.lcm.data.generator import KHybridGenerator, ClassicGenerator, YZCostGenerator -from endure.lsm.cost import EndureCost -from endure.lsm.types import LSMBounds, LSMDesign, Policy, System, Workload +from axe.lcm.data.generator import KHybridGenerator, ClassicGenerator, YZCostGenerator +from axe.lsm.cost import EndureCost +from axe.lsm.types import LSMBounds, LSMDesign, Policy, System, Workload from mlos_core.optimizers import SmacOptimizer NUM_ROUNDS = 100 @@ -37,14 +37,16 @@ def _suggest_to_design(self, suggestion: pd.DataFrame) -> LSMDesign: policy = Policy.Tiering else: policy = Policy.Leveling - return LSMDesign( - h=bits_per_element, T=size_ratio, policy=policy - ) + return LSMDesign(h=bits_per_element, T=size_ratio, policy=policy) elif self.model_type == Policy.YZHybrid: y_val: int = suggestion["y_val"].values[0] z_val: int = suggestion["z_val"].values[0] return LSMDesign( - h=bits_per_element, T=size_ratio, policy=Policy.YZHybrid, Y=y_val, Z=z_val + h=bits_per_element, + T=size_ratio, + policy=Policy.YZHybrid, + Y=y_val, + Z=z_val, ) elif self.model_type == Policy.KHybrid: kaps: np.ndarray = suggestion[[f"kap_{idx}" for idx in range(20)]].values[0] @@ -79,12 +81,12 @@ def _create_parameter_space(self, system: System) -> CS.ConfigurationSpace: CS.UniformIntegerHyperparameter( name="y_val", lower=1, - upper=self.bounds.size_ratio_range[1]-1, + upper=self.bounds.size_ratio_range[1] - 1, ), CS.UniformIntegerHyperparameter( name="z_val", lower=1, - upper=self.bounds.size_ratio_range[1]-1, + upper=self.bounds.size_ratio_range[1] - 1, ), ] parameters = norm_params + yz_params @@ -134,7 +136,7 @@ def _train_model( return def run(self) -> None: - system = System() + system = System() self.db.create_tables() for rep_wl in self.config["workloads"]: workload = Workload( @@ -163,7 +165,7 @@ def __init__(self, config: dict, db_path: str = "testing_yz.db") -> None: def create_tables(self) -> None: cursor = self.connector.cursor() - + cursor.execute( """ CREATE TABLE IF NOT EXISTS environments ( @@ -181,7 +183,7 @@ def create_tables(self) -> None: ); """ ) - + tunings_cols_comm = """ idx INTEGER PRIMARY KEY AUTOINCREMENT, env_id INTEGER, @@ -190,7 +192,7 @@ def create_tables(self) -> None: bits_per_elem REAL, size_ratio INTEGER, cost REAL""" - + if self.model_type == Policy.Classic: policy_field = "policy TEXT" elif self.model_type == Policy.YZHybrid: @@ -205,14 +207,12 @@ def create_tables(self) -> None: {key_string} ); """ - + cursor.execute(create_tunings_table_query) self.connector.commit() cursor.close() - - return - + return def log_workload(self, workload: Workload, system: System) -> int: cursor = self.connector.cursor() @@ -245,7 +245,7 @@ def log_workload(self, workload: Workload, system: System) -> int: ), ) self.connector.commit() - + assert cursor.lastrowid is not None return cursor.lastrowid @@ -287,7 +287,15 @@ def log_round( cost ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, - (workload_id, trial, round, design.h, int(design.T), int(design.Y), int(design.Z)) + ( + workload_id, + trial, + round, + design.h, + int(design.T), + int(design.Y), + int(design.Z), + ) + (cost,), ) else: @@ -308,12 +316,17 @@ def log_round( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? ) """, - (workload_id, trial, round, design.h, int(design.T),) + ( + workload_id, + trial, + round, + design.h, + int(design.T), + ) + tuple(design.K) + (cost,), ) - - + self.connector.commit() - + return diff --git a/test/test_klsm_model.py b/test/test_klsm_model.py index 1de6038..6ed9db7 100644 --- a/test/test_klsm_model.py +++ b/test/test_klsm_model.py @@ -1,8 +1,8 @@ import pytest import torch -from endure.lcm.util import one_hot_lcm -from endure.lcm.model import KapModel +from axe.lcm.util import one_hot_lcm +from axe.lcm.model import KapModel OUT_WIDTH = 4 diff --git a/test/test_qlsm_model.py b/test/test_qlsm_model.py index d169561..613b429 100644 --- a/test/test_qlsm_model.py +++ b/test/test_qlsm_model.py @@ -1,8 +1,8 @@ import pytest import torch -from endure.lcm.util import one_hot_lcm -from endure.lcm.model import QModel +from axe.lcm.util import one_hot_lcm +from axe.lcm.model import QModel OUT_WIDTH = 4 diff --git a/test/test_qlsm_tuner.py b/test/test_qlsm_tuner.py index 39685ed..7ab39b5 100644 --- a/test/test_qlsm_tuner.py +++ b/test/test_qlsm_tuner.py @@ -1,7 +1,7 @@ import pytest import torch -from endure.ltune.model.qlsm_tuner import QLSMTuner +from axe.ltune.model.qlsm_tuner import QLSMTuner @pytest.mark.parametrize("num_feats", [3])