Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature restart #5

Merged
merged 28 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
5dcbb1e
Added support for resarting from pre-existing checkpoint files.
mb2055 Nov 3, 2023
35ccd32
Added test for the restart feature.
mb2055 Nov 3, 2023
16f7d86
Added config filename to filename generator
mb2055 Nov 3, 2023
c8e9f52
Fixed config writing
mb2055 Nov 3, 2023
bb9606d
Merge branch 'main' into feature_restart
mb2055 Nov 6, 2023
9e66360
Remove duplicates from list of files to be deleted when --no-restart …
mb2055 Nov 6, 2023
7965730
Simulations will now start from scratch if restart==True but no check…
mb2055 Nov 6, 2023
b85be1b
Slight change to setting of charge scaled morph
mb2055 Nov 6, 2023
485ae5a
Added thorough testing for restarting, verifying that all options tha…
mb2055 Nov 6, 2023
d4faed6
Changed default behaviour to restart=False, warnings regarding overwr…
mb2055 Nov 6, 2023
257a99c
Re-factor of config to allow for the logger to be used within config …
mb2055 Nov 7, 2023
8b40942
Config now encoded in sire checkpoint files
mb2055 Nov 7, 2023
595d86b
Removing changed the writing of the output directory from PosixPath t…
mb2055 Nov 7, 2023
0d73e87
platform not os.platform
mb2055 Nov 7, 2023
845db4d
Explicitly delete objects before tempfile cleanup to solve windows is…
mb2055 Nov 8, 2023
88e7c8d
Disable logfile check on windows
mb2055 Nov 8, 2023
8561f5c
Remove wrongly added test, actually disable windows on logfile test
mb2055 Nov 8, 2023
499c529
Switch logfile test back on and add explicit logger deletion to confi…
mb2055 Nov 8, 2023
08dec8f
Removed second half of logfile creation test
mb2055 Nov 8, 2023
471ee73
returned windows skip to test for now
mb2055 Nov 8, 2023
ef71177
Proper naming for windows platform
mb2055 Nov 8, 2023
4b59cbb
another attempt at fixing logging on windows
mb2055 Nov 8, 2023
c794e7b
Added checks for consistency when restarting from checkpoint
mb2055 Nov 9, 2023
fe081d5
Added lambda value to properties [ci skip]
mb2055 Nov 9, 2023
7873aed
Several changes to bring the code in line with @lohedges suggestions.
mb2055 Nov 14, 2023
a832c92
Removed mention of directory_existed from _config.py - there is no ne…
mb2055 Nov 14, 2023
e7b82c3
Adds a systems_are_same function to the runner that checks uids, numb…
mb2055 Nov 14, 2023
71ee5df
Made systems_are_same private.
mb2055 Nov 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 70 additions & 12 deletions src/somd2/config/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class Config:

def __init__(
self,
log_level="info",
log_file=None,
runtime="1ns",
timestep="4fs",
temperature="300K",
Expand Down Expand Up @@ -96,9 +98,9 @@ def __init__(
max_gpus=None,
run_parallel=True,
output_directory="output",
restart=False,
write_config=True,
log_level="info",
log_file=None,
supress_overwrite_warning=False,
):
"""
Constructor.
Expand Down Expand Up @@ -195,6 +197,9 @@ def __init__(
output_directory: str
Path to a directory to store output files.

restart: bool
Whether to restart from a previous simulation - files found in `output-directory`.

write_config: bool
Whether to write the configuration options to a YAML file in the output directory.

Expand All @@ -203,8 +208,16 @@ def __init__(

log_file: str
Name of log file, will be saved in output directory.

supress_overwrite_warning: bool
Whether to supress the warning when overwriting files in the output directory.
"""

# Setup logger before doing anything else
self.log_level = log_level
self.log_file = log_file
self.output_directory = output_directory

self.runtime = runtime
self.temperature = temperature
self.pressure = pressure
Expand Down Expand Up @@ -232,10 +245,11 @@ def __init__(
self.max_threads = max_threads
self.max_gpus = max_gpus
self.run_parallel = run_parallel
self.output_directory = output_directory
self.restart = restart

self.write_config = write_config
self.log_level = log_level
self.log_file = log_file

self.supress_overwrite_warning = supress_overwrite_warning

def __str__(self):
"""Return a string representation of this object."""
Expand Down Expand Up @@ -268,6 +282,10 @@ def __eq__(self, other):
"""Equality operator."""
return self.as_dict() == other.as_dict()

def __del__(self):
"""Destructor."""
_logger.remove()

@staticmethod
def from_yaml(path):
"""
Expand All @@ -286,21 +304,33 @@ def from_yaml(path):

return Config(**d)

def as_dict(self):
"""Convert config object to dictionary"""
from pathlib import PosixPath as _PosixPath
def as_dict(self, sire_compatible=False):
"""Convert config object to dictionary

Parameters
----------
sire_compatible: bool
Whether to convert to a dictionary compatible with Sire,
this simply converts any options with a value of None to a
boolean with the value False.
"""
from pathlib import Path as _Path
from sire.cas import LambdaSchedule as _LambdaSchedule

d = {}
for attr, value in self.__dict__.items():
if attr.startswith("_extra") or attr.startswith("extra"):
continue
attr_l = attr[1:]
if isinstance(value, _PosixPath):
if isinstance(value, _Path):
d[attr_l] = str(value)
else:
try:
d[attr_l] = value.to_string()
except AttributeError:
d[attr_l] = value
if value is None and sire_compatible:
d[attr_l] = False

# Handle the lambda schedule separately so that we can use simplified
# keyword options.
Expand Down Expand Up @@ -485,9 +515,7 @@ def lambda_schedule(self, lambda_schedule):
if lambda_schedule == "standard_morph":
self._lambda_schedule = _LambdaSchedule.standard_morph()
elif lambda_schedule == "charge_scaled_morph":
self._lambda_schedule = _LambdaSchedule.charge_scaled_morph(
self._charge_scale_factor
)
self._lambda_schedule = _LambdaSchedule.charge_scaled_morph(0.2)
else:
self._lambda_schedule = lambda_schedule
else:
Expand Down Expand Up @@ -867,6 +895,16 @@ def run_parallel(self, run_parallel):
raise ValueError("'run_parallel' must be of type 'bool'")
self._run_parallel = run_parallel

@property
def restart(self):
return self._restart

@restart.setter
def restart(self, restart):
if not isinstance(restart, bool):
raise ValueError("'restart' must be of type 'bool'")
self._restart = restart

@property
def output_directory(self):
return self._output_directory
Expand All @@ -885,6 +923,10 @@ def output_directory(self, output_directory):
raise ValueError(
f"Output directory {output_directory} does not exist and cannot be created"
)
if self.log_file is not None:
# Can now add the log file
_logger.add(output_directory / self.log_file, level=self.log_level.upper())
_logger.debug(f"Logging to {output_directory / self.log_file}")
self._output_directory = output_directory

@property
Expand All @@ -910,6 +952,11 @@ def log_level(self, log_level):
raise ValueError(
f"Log level not recognised. Valid log levels are: {', '.join(self._choices['log_level'])}"
)
# Do logging setup here for use in the rest of the ocnfig and all other modules.
import sys

_logger.remove()
_logger.add(sys.stderr, level=log_level.upper(), enqueue=True)
self._log_level = log_level

@property
Expand All @@ -920,8 +967,19 @@ def log_file(self):
def log_file(self, log_file):
if log_file is not None and not isinstance(log_file, str):
raise TypeError("'log_file' must be of type 'str'")
# Can't add the logfile to the logger here as we don't know the output directory yet.
self._log_file = log_file

@property
def supress_overwrite_warning(self):
return self._supress_overwrite_warning

@supress_overwrite_warning.setter
def supress_overwrite_warning(self, supress_overwrite_warning):
if not isinstance(supress_overwrite_warning, bool):
raise ValueError("'supress_overwrite_warning' must be of type 'bool'")
self._supress_overwrite_warning = supress_overwrite_warning

@classmethod
def _create_parser(cls):
"""
Expand Down
82 changes: 65 additions & 17 deletions src/somd2/runner/_dynamics.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,17 @@
#####################################################################

__all__ = ["Dynamics"]

import platform as _platform
from pathlib import Path as _Path

from ..config import Config as _Config
from ..io import dataframe_to_parquet as _dataframe_to_parquet
from ..io import parquet_append as _parquet_append

from somd2 import _logger
import platform as _platform

from ._runner import _lam_sym


class Dynamics:
Expand Down Expand Up @@ -88,11 +91,51 @@ def __init__(
raise TypeError("config must be a Config object")

self._config = config
# If resarting, subtract the time already run from the total runtime
if self._config.restart:
self._config.runtime = str(self._config.runtime - self._system.time())
self._lambda_val = lambda_val
self._lambda_array = lambda_array
self._increment = increment
self._device = device
self._has_space = has_space
self._filenames = self.create_filenames(
self._lambda_array,
self._lambda_val,
self._config.output_directory,
self._config.restart,
)

@staticmethod
def create_filenames(lambda_array, lambda_value, output_directory, restart=False):
# Create incremental file - used for writing trajectory files
def increment_filename(base_filename, suffix):
file_number = 0
file_path = _Path(output_directory)
while True:
filename = (
f"{base_filename}_{file_number}.{suffix}"
if file_number > 0
else f"{base_filename}.{suffix}"
)
full_path = file_path / filename
if not full_path.exists():
return filename
file_number += 1

if lambda_value not in lambda_array:
raise ValueError("lambda_value not in lambda_array")
filenames = {}
index = lambda_array.index(lambda_value)
filenames["checkpoint"] = f"checkpoint_{index}.s3"
filenames["energy_traj"] = f"energy_traj_{index}.parquet"
if restart:
filenames["trajectory"] = increment_filename(f"traj_{index}", "dcd")
filenames["config"] = increment_filename("config", "yaml")
else:
filenames["trajectory"] = f"traj_{index}.dcd"
filenames["config"] = "config.yaml"
return filenames

def _setup_dynamics(self, equilibration=False):
"""
Expand All @@ -116,7 +159,7 @@ def _setup_dynamics(self, equilibration=False):
pressure = None

try:
map = self._config.extra_args
map = self._config._extra_args
except:
map = None

Expand Down Expand Up @@ -151,30 +194,30 @@ def _minimisation(self, lambda_min=None):
lambda_val.
"""
if lambda_min is None:
_logger.info(f"Minimising at λ = {self._lambda_val}")
_logger.info(f"Minimising at {_lam_sym} = {self._lambda_val}")
try:
m = self._system.minimisation(
cutoff_type=self._config.cutoff_type,
schedule=self._config.lambda_schedule,
lambda_value=self._lambda_val,
platform=self._config.platform,
vacuum=not self._has_space,
map=self._config.extra_args,
map=self._config._extra_args,
)
m.run()
self._system = m.commit()
except:
raise
else:
_logger.info(f"Minimising at λ = {lambda_min}")
_logger.info(f"Minimising at {_lam_sym} = {lambda_min}")
try:
m = self._system.minimisation(
cutoff_type=self._config.cutoff_type,
schedule=self._config.lambda_schedule,
lambda_value=lambda_min,
platform=self._config.platform,
vacuum=not self._has_space,
map=self._config.extra_args,
map=self._config._extra_args,
)
m.run()
self._system = m.commit()
Expand All @@ -189,7 +232,7 @@ def _equilibration(self):
Currently just runs dynamics without any saving
"""

_logger.info(f"Equilibrating at λ = {self._lambda_val}")
_logger.info(f"Equilibrating at {_lam_sym} = {self._lambda_val}")
self._setup_dynamics(equilibration=True)
self._dyn.run(
self._config.equilibration_time,
Expand Down Expand Up @@ -242,7 +285,7 @@ def generate_lam_vals(lambda_base, increment):
else:
lam_arr = self._lambda_array + self._lambda_grad

_logger.info(f"Running dynamics at λ = {self._lambda_val}")
_logger.info(f"Running dynamics at {_lam_sym} = {self._lambda_val}")

if self._config.checkpoint_frequency.value() > 0.0:
### Calc number of blocks and remainder (surely there's a better way?)###
Expand All @@ -258,9 +301,8 @@ def generate_lam_vals(lambda_base, increment):
energy_per_block = (
self._config.checkpoint_frequency / self._config.energy_frequency
)
sire_checkpoint_name = (
_Path(self._config.output_directory)
/ f"checkpoint_{self._lambda_array.index(self._lambda_val)}.s3"
sire_checkpoint_name = str(
_Path(self._config.output_directory) / self._filenames["checkpoint"]
)
# Run num_blocks dynamics and then run a final block if rem > 0
for x in range(int(num_blocks)):
Expand Down Expand Up @@ -291,15 +333,22 @@ def generate_lam_vals(lambda_base, increment):
"temperature": str(self._config.temperature.value()),
},
filepath=self._config.output_directory,
filename=f"energy_traj_{self._lambda_array.index(self._lambda_val)}.parquet",
filename=self._filenames["energy_traj"],
)
# Also want to add the simulation config to the
# system properties once a block has been successfully run.
self._system.set_property(
"config", self._config.as_dict(sire_compatible=True)
)
# Finally, encode lambda value in to properties.
self._system.set_property("lambda", self._lambda_val)
else:
_parquet_append(
f,
df.iloc[-int(energy_per_block) :],
)
_logger.info(
f"Finished block {x+1} of {num_blocks} for λ = {self._lambda_val}"
f"Finished block {x+1} of {num_blocks} for {_lam_sym} = {self._lambda_val}"
)
except:
raise
Expand Down Expand Up @@ -332,15 +381,14 @@ def generate_lam_vals(lambda_base, increment):
self._system = self._dyn.commit()

if self._config.save_trajectories:
traj_filename = (
self._config.output_directory
/ f"traj_{self._lambda_array.index(self._lambda_val)}.dcd"
traj_filename = str(
self._config.output_directory / self._filenames["trajectory"]
)
from sire import save as _save

_save(self._system.trajectory(), traj_filename, format=["DCD"])
# dump final system to checkpoint file
_stream.save(self._system, str(sire_checkpoint_name))
_stream.save(self._system, sire_checkpoint_name)
df = self._system.energy_trajectory(to_alchemlyb=True)
return df

Expand Down
Loading
Loading