Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature restart #5

Merged
merged 28 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
5dcbb1e
Added support for resarting from pre-existing checkpoint files.
mb2055 Nov 3, 2023
35ccd32
Added test for the restart feature.
mb2055 Nov 3, 2023
16f7d86
Added config filename to filename generator
mb2055 Nov 3, 2023
c8e9f52
Fixed config writing
mb2055 Nov 3, 2023
bb9606d
Merge branch 'main' into feature_restart
mb2055 Nov 6, 2023
9e66360
Remove duplicates from list of files to be deleted when --no-restart …
mb2055 Nov 6, 2023
7965730
Simulations will now start from scratch if restart==True but no check…
mb2055 Nov 6, 2023
b85be1b
Slight change to setting of charge scaled morph
mb2055 Nov 6, 2023
485ae5a
Added thorough testing for restarting, verifying that all options tha…
mb2055 Nov 6, 2023
d4faed6
Changed default behaviour to restart=False, warnings regarding overwr…
mb2055 Nov 6, 2023
257a99c
Re-factor of config to allow for the logger to be used within config …
mb2055 Nov 7, 2023
8b40942
Config now encoded in sire checkpoint files
mb2055 Nov 7, 2023
595d86b
Removing changed the writing of the output directory from PosixPath t…
mb2055 Nov 7, 2023
0d73e87
platform not os.platform
mb2055 Nov 7, 2023
845db4d
Explicitly delete objects before tempfile cleanup to solve windows is…
mb2055 Nov 8, 2023
88e7c8d
Disable logfile check on windows
mb2055 Nov 8, 2023
8561f5c
Remove wrongly added test, actually disable windows on logfile test
mb2055 Nov 8, 2023
499c529
Switch logfile test back on and add explicit logger deletion to confi…
mb2055 Nov 8, 2023
08dec8f
Removed second half of logfile creation test
mb2055 Nov 8, 2023
471ee73
returned windows skip to test for now
mb2055 Nov 8, 2023
ef71177
Proper naming for windows platform
mb2055 Nov 8, 2023
4b59cbb
another attempt at fixing logging on windows
mb2055 Nov 8, 2023
c794e7b
Added checks for consistency when restarting from checkpoint
mb2055 Nov 9, 2023
fe081d5
Added lambda value to properties [ci skip]
mb2055 Nov 9, 2023
7873aed
Several changes to bring the code in line with @lohedges suggestions.
mb2055 Nov 14, 2023
a832c92
Removed mention of directory_existed from _config.py - there is no ne…
mb2055 Nov 14, 2023
e7b82c3
Adds a systems_are_same function to the runner that checks uids, numb…
mb2055 Nov 14, 2023
71ee5df
Made systems_are_same private.
mb2055 Nov 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/somd2/config/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def __init__(
Path to a directory to store output files.

restart: bool
Whether to restart from a previous simulation - files found in {output-directory}.
Whether to restart from a previous simulation - files found in `output-directory`.

write_config: bool
Whether to write the configuration options to a YAML file in the output directory.
Expand Down Expand Up @@ -319,6 +319,8 @@ def as_dict(self, sire_compatible=False):

d = {}
for attr, value in self.__dict__.items():
if attr.startswith("_extra") or attr.startswith("extra"):
continue
attr_l = attr[1:]
if isinstance(value, _Path):
d[attr_l] = str(value)
Expand Down Expand Up @@ -913,7 +915,6 @@ def output_directory(self):

@output_directory.setter
def output_directory(self, output_directory):
self.cirectory_existed = False
lohedges marked this conversation as resolved.
Show resolved Hide resolved
if not isinstance(output_directory, _Path):
try:
output_directory = _Path(output_directory)
Expand Down
24 changes: 12 additions & 12 deletions src/somd2/runner/_dynamics.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,9 @@
from ..io import parquet_append as _parquet_append

from somd2 import _logger
import platform as _platform

if _platform.system() == "Windows":
lam_sym = "lambda"
else:
lam_sym = "λ"
from ._runner import _lam_sym


class Dynamics:
Expand Down Expand Up @@ -161,7 +159,7 @@ def _setup_dynamics(self, equilibration=False):
pressure = None

try:
map = self._config.extra_args
map = self._config._extra_args
except:
map = None

Expand Down Expand Up @@ -196,30 +194,30 @@ def _minimisation(self, lambda_min=None):
lambda_val.
"""
if lambda_min is None:
_logger.info(f"Minimising at {lam_sym} = {self._lambda_val}")
_logger.info(f"Minimising at {_lam_sym} = {self._lambda_val}")
try:
m = self._system.minimisation(
cutoff_type=self._config.cutoff_type,
schedule=self._config.lambda_schedule,
lambda_value=self._lambda_val,
platform=self._config.platform,
vacuum=not self._has_space,
map=self._config.extra_args,
map=self._config._extra_args,
)
m.run()
self._system = m.commit()
except:
raise
else:
_logger.info(f"Minimising at {lam_sym} = {lambda_min}")
_logger.info(f"Minimising at {_lam_sym} = {lambda_min}")
try:
m = self._system.minimisation(
cutoff_type=self._config.cutoff_type,
schedule=self._config.lambda_schedule,
lambda_value=lambda_min,
platform=self._config.platform,
vacuum=not self._has_space,
map=self._config.extra_args,
map=self._config._extra_args,
)
m.run()
self._system = m.commit()
Expand All @@ -234,7 +232,7 @@ def _equilibration(self):
Currently just runs dynamics without any saving
"""

_logger.info(f"Equilibrating at {lam_sym} = {self._lambda_val}")
_logger.info(f"Equilibrating at {_lam_sym} = {self._lambda_val}")
self._setup_dynamics(equilibration=True)
self._dyn.run(
self._config.equilibration_time,
Expand Down Expand Up @@ -287,7 +285,7 @@ def generate_lam_vals(lambda_base, increment):
else:
lam_arr = self._lambda_array + self._lambda_grad

_logger.info(f"Running dynamics at {lam_sym} = {self._lambda_val}")
_logger.info(f"Running dynamics at {_lam_sym} = {self._lambda_val}")

if self._config.checkpoint_frequency.value() > 0.0:
### Calc number of blocks and remainder (surely there's a better way?)###
Expand Down Expand Up @@ -342,13 +340,15 @@ def generate_lam_vals(lambda_base, increment):
self._system.set_property(
"config", self._config.as_dict(sire_compatible=True)
)
# Finally, encode lambda value in to properties.
self._system.set_property("lambda", self._lambda_val)
else:
_parquet_append(
f,
df.iloc[-int(energy_per_block) :],
)
_logger.info(
f"Finished block {x+1} of {num_blocks} for {lam_sym} = {self._lambda_val}"
f"Finished block {x+1} of {num_blocks} for {_lam_sym} = {self._lambda_val}"
)
except:
raise
Expand Down
43 changes: 21 additions & 22 deletions src/somd2/runner/_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@
from ..io import dict_to_yaml as _dict_to_yaml

from somd2 import _logger
import platform as _platform

if _platform.system() == "Windows":
lam_sym = "lambda"
_lam_sym = "lambda"
else:
lam_sym = "λ"
_lam_sym = "λ"


class Runner:
Expand Down Expand Up @@ -232,7 +233,6 @@ def _compare_configs(config1, config2):
allowed_diffs = [
"runtime",
"restart",
"temperature",
"minimise",
"max_threads",
"equilibration_time",
Expand All @@ -252,7 +252,6 @@ def _compare_configs(config1, config2):
"log_level",
"log_file",
"supress_overwrite_warning",
"xtra_args",
]
for key in config1.keys():
if key not in allowed_diffs:
Expand Down Expand Up @@ -472,7 +471,7 @@ def _initialise_simulation(self, system, lambda_value, device=None):
has_space=self._has_space,
)
except:
_logger.warning(f"System creation at {lam_sym} = {lambda_value} failed")
_logger.warning(f"System creation at {_lam_sym} = {lambda_value} failed")
raise

def _cleanup_simulation(self):
Expand Down Expand Up @@ -512,17 +511,17 @@ def run(self):
threads_per_worker = (
self._config.max_threads // self._config.num_lambda
)
self._config.extra_args = {"threads": threads_per_worker}
self._config._extra_args = {"threads": threads_per_worker}

# (Multi-)GPU platform.
elif self._is_gpu:
self.max_workers = len(self._gpu_pool)
self._config.extra_args = {}
self._config._extra_args = {}

# All other platforms.
else:
self._max_workers = 1
self._config.extra_args = {}
self._config._extra_args = {}

import concurrent.futures as _futures

Expand All @@ -539,7 +538,7 @@ def run(self):
result = False

_logger.error(
f"Exception raised for {lam_sym} = {lambda_value}: {e}"
f"Exception raised for {_lam_sym} = {lambda_value}: {e}"
)
with self._lock:
results.append(result)
Expand Down Expand Up @@ -600,8 +599,8 @@ def _run(sim):
return df, lambda_grad, speed
except Exception as e:
_logger.warning(
f"Minimisation/dynamics at {lam_sym} = {lambda_value} failed with the "
f"following exception {e}, trying again with minimsation at {lam_sym} = 0."
f"Minimisation/dynamics at {_lam_sym} = {lambda_value} failed with the "
f"following exception {e}, trying again with minimsation at {_lam_sym} = 0."
)
try:
df = sim._run(lambda_minimisation=0.0)
Expand All @@ -610,8 +609,8 @@ def _run(sim):
return df, lambda_grad, speed
except Exception as e:
_logger.error(
f"Minimisation/dynamics at {lam_sym} = {lambda_value} failed, even after "
f"minimisation at {lam_sym} = 0. The following warning was raised: {e}."
f"Minimisation/dynamics at {_lam_sym} = {lambda_value} failed, even after "
f"minimisation at {_lam_sym} = 0. The following warning was raised: {e}."
)
raise
else:
Expand All @@ -622,7 +621,7 @@ def _run(sim):
return df, lambda_grad, speed
except Exception as e:
_logger.error(
f"Dynamics at {lam_sym} = {lambda_value} failed. The following warning was "
f"Dynamics at {_lam_sym} = {lambda_value} failed. The following warning was "
f"raised: {e}. This may be due to a lack of minimisation."
)

Expand All @@ -636,7 +635,7 @@ def _run(sim):
).clone()
except:
_logger.warning(
f"Unable to load checkpoint file for {lam_sym}={lambda_value}, starting from scratch."
f"Unable to load checkpoint file for {_lam_sym}={lambda_value}, starting from scratch."
)
else:
try:
Expand All @@ -649,7 +648,7 @@ def _run(sim):
f"last config: {self.last_config}, current config: {cfg_here}"
)
_logger.error(
f"Config for {lam_sym}={lambda_value} does not match previous config."
f"Config for {_lam_sym}={lambda_value} does not match previous config."
)
raise
else:
Expand All @@ -668,12 +667,12 @@ def _run(sim):
acc_time = system.time()
if acc_time > self._config.runtime - self._config.timestep:
_logger.success(
f"{lam_sym} = {lambda_value} already complete. Skipping."
f"{_lam_sym} = {lambda_value} already complete. Skipping."
)
return True
else:
_logger.debug(
f"Restarting {lam_sym} = {lambda_value} at time {acc_time}, time remaining = {self._config.runtime - acc_time}"
f"Restarting {_lam_sym} = {lambda_value} at time {acc_time}, time remaining = {self._config.runtime - acc_time}"
)
# GPU platform.
if self._is_gpu:
Expand All @@ -683,12 +682,12 @@ def _run(sim):
self._remove_gpu_from_pool(gpu_num)
if lambda_value is not None:
_logger.info(
f"Running {lam_sym} = {lambda_value} on GPU {gpu_num}"
f"Running {_lam_sym} = {lambda_value} on GPU {gpu_num}"
)
# Assumes that device for non-parallel GPU jobs is 0
else:
gpu_num = 0
_logger.info("Running {lam_sym} = {lambda_value} on GPU 0")
_logger.info("Running {_lam_sym} = {lambda_value} on GPU 0")
self._initialise_simulation(system, lambda_value, device=gpu_num)
try:
df, lambda_grad, speed = _run(self._sim)
Expand All @@ -705,7 +704,7 @@ def _run(sim):

# All other platforms.
else:
_logger.info(f"Running {lam_sym} = {lambda_value}")
_logger.info(f"Running {_lam_sym} = {lambda_value}")

self._initialise_simulation(system, lambda_value)
try:
Expand All @@ -730,5 +729,5 @@ def _run(sim):
filename=self._fnames[lambda_value]["energy_traj"],
)
del system
_logger.success(f"{lam_sym} = {lambda_value} complete")
_logger.success(f"{_lam_sym} = {lambda_value} complete")
return True
6 changes: 2 additions & 4 deletions tests/runner/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,12 @@ def test_logfile_creation():
mols = sr.load(sr.expand(sr.tutorial_url, "merged_molecule.s3"))
from pathlib import Path

# Instantiate a runner using the default config.
# (All default options, other than platform="cpu".)
# Test that a logfile is created once a config object is initialised
config = Config(output_directory=tmpdir, log_file="test.log")
assert config.log_file is not None
assert Path.exists(config.output_directory / config.log_file)

# Instantiate a runner using the default config.
# (All default options, other than platform="cpu".)
# Test that a logfile is created once a runner object is initialised
runner = Runner(mols, Config(output_directory=tmpdir, log_file="test1.log"))
assert runner._config.log_file is not None
assert Path.exists(runner._config.output_directory / runner._config.log_file)
Expand Down
7 changes: 7 additions & 0 deletions tests/runner/test_restart.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ def test_restart():
with pytest.raises(ValueError):
runner_timestep = Runner(mols, Config(**config_difftimestep))

config_difftemperature = config_new.copy()
config_difftemperature["runtime"] = "36fs"
config_difftemperature["temperature"] = "200K"

with pytest.raises(ValueError):
runner_temperature = Runner(mols, Config(**config_difftemperature))

config_diffscalefactor = config_new.copy()
config_diffscalefactor["runtime"] = "36fs"
config_diffscalefactor["charge_scale_factor"] = 0.5
Expand Down
Loading