From 6b6d572d5d3eb1629ecf944d987169c1d939ee96 Mon Sep 17 00:00:00 2001 From: Niko Yasui Date: Thu, 31 Oct 2024 16:07:41 -0600 Subject: [PATCH 1/4] feat(Scheduler): make scheduler pass results-path to main experiment.py file Fixes: #23 --- ml_experiment/Scheduler.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ml_experiment/Scheduler.py b/ml_experiment/Scheduler.py index 8d8d5a4..de40ec1 100644 --- a/ml_experiment/Scheduler.py +++ b/ml_experiment/Scheduler.py @@ -41,6 +41,7 @@ def __init__(self, exp_name: str, seeds: list[int], entry: str, version: Version self.seeds = seeds self.entry = entry self.base_path = base or os.getcwd() + self.results_path = os.path.join(self.base_path, 'results', self.exp_name) self.version = version if version is not None else -1 self.all_runs = set[RunSpec]() # TODO: polars dataframe! @@ -51,11 +52,11 @@ def __repr__(self): return f'Scheduler({self.exp_name}, {self.seeds}, {self.version}, {self.all_runs})' def get_all_runs(self) -> Self: - res_path = os.path.join(self.base_path, 'results', self.exp_name, 'metadata.db') - meta = MetadataTableRegistry() - with sqlite3.connect(res_path) as con: + table_path = os.path.join(self.results_path, 'metadata.db') + + with sqlite3.connect(table_path) as con: cur = con.cursor() parts = meta.get_parts(cur) resloved_ver = self._resolve_version(parts, cur, meta) @@ -97,7 +98,7 @@ def _run_local(self, c: LocalRunConfig) -> None: def _run_single(self, r: RunSpec) -> None: - subprocess.run(['python', self.entry, '--part', r.part_name, '--config-id', str(r.config_id), '--seed', str(r.seed), '--version', str(r.version)]) + subprocess.run(['python', self.entry, '--part', r.part_name, '--config-id', str(r.config_id), '--seed', str(r.seed), '--version', str(r.version), '--results-path', self.results_path]) def _resolve_version( From 9f5fbc9021d2fa3b922841929a04b2195fdd272d Mon Sep 17 00:00:00 2001 From: Niko Yasui Date: Thu, 31 Oct 2024 16:10:24 -0600 Subject: [PATCH 2/4] test(acceptance): simplify tests due to Scheduler passing results to experiment file --- tests/acceptance/my_experiment.py | 2 +- tests/acceptance/test_softmaxAC_mc.py | 20 ++------------------ 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/tests/acceptance/my_experiment.py b/tests/acceptance/my_experiment.py index d956dc1..1d863f0 100644 --- a/tests/acceptance/my_experiment.py +++ b/tests/acceptance/my_experiment.py @@ -9,7 +9,7 @@ parser.add_argument("--config-id", type=int, required=True) parser.add_argument("--seed", type=int, required=True) parser.add_argument("--version", type=int, required=True) -parser.add_argument("--results_path", type=str, required=True) +parser.add_argument("--results-path", type=str, required=True) class SoftmaxAC: def __init__( diff --git a/tests/acceptance/test_softmaxAC_mc.py b/tests/acceptance/test_softmaxAC_mc.py index 37eebc7..e1f6184 100644 --- a/tests/acceptance/test_softmaxAC_mc.py +++ b/tests/acceptance/test_softmaxAC_mc.py @@ -1,10 +1,9 @@ import os import pytest -import subprocess from ml_experiment.ExperimentDefinition import ExperimentDefinition from ml_experiment.DefinitionPart import DefinitionPart -from ml_experiment.Scheduler import LocalRunConfig, RunSpec, Scheduler +from ml_experiment.Scheduler import LocalRunConfig, Scheduler @pytest.fixture @@ -34,20 +33,6 @@ def write_database(tmp_path, alphas: list[float], taus: list[float]): return softmaxAC - -# overwrite the run_single function -class StubScheduler(Scheduler): - - # allows us to force the results path to be in a specific spot - def __init__(self, results_path: str, *args, **kwargs): - super().__init__(*args, **kwargs) - self.results_path = results_path - - # adding the results path to the command - def _run_single(self: Scheduler, r: RunSpec) -> None: - subprocess.run(['python', self.entry, '--part', r.part_name, '--config-id', str(r.config_id), '--seed', str(r.seed), '--version', str(r.version), '--results_path', self.results_path]) - - def test_read_database(tmp_path, base_path): """ Test that we can retrieve the configurations from the experiment definition. @@ -145,12 +130,11 @@ def test_run_tasks(tmp_path): run_conf = LocalRunConfig(tasks_in_parallel=ntasks, log_path=".logs/") # set up scheduler - sched = StubScheduler( + sched = Scheduler( exp_name=exp_name, entry=experiment_file_name, seeds=[seed_num], version=version_num, - results_path=results_path, base = str(tmp_path), ) From 051bc1f8497830046e25a37aa6783cae12b383ea Mon Sep 17 00:00:00 2001 From: Niko Yasui Date: Thu, 31 Oct 2024 16:14:06 -0600 Subject: [PATCH 3/4] fix: allow ml_experiment.metadata to be importable --- ml_experiment/metadata/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 ml_experiment/metadata/__init__.py diff --git a/ml_experiment/metadata/__init__.py b/ml_experiment/metadata/__init__.py new file mode 100644 index 0000000..e69de29 From cffc428157993d726a5d553f485e3b9bafd57e33 Mon Sep 17 00:00:00 2001 From: Niko Yasui Date: Thu, 31 Oct 2024 16:21:31 -0600 Subject: [PATCH 4/4] build(packages): add submodules to package find --- pyproject.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d2c6248..de6775f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,7 @@ [tool] -[tool.setuptools] -packages = ['ml_experiment'] +[tool.setuptools.packages.find] +where = ["."] # list of folders that contain the packages (["."] by default) +include = ["ml_experiment*"] # package names should match these glob patterns (["*"] by default) [tool.commitizen] name = "cz_conventional_commits"