Skip to content

Commit

Permalink
Merge branch 'materialsproject:main' into mlff_md
Browse files Browse the repository at this point in the history
  • Loading branch information
esoteric-ephemera authored Apr 3, 2024
2 parents 4a88d7b + b55cf83 commit 78ad999
Show file tree
Hide file tree
Showing 109 changed files with 1,921 additions and 4 deletions.
3 changes: 1 addition & 2 deletions src/atomate2/cp2k/drones.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ def assimilate(self, path: str | Path | None = None) -> TaskDocument:
TaskDocument
A CP2K task document.
"""
if path is None:
path = Path.cwd()
path = path or Path.cwd()

try:
doc = TaskDocument.from_directory(path, **self.task_document_kwargs)
Expand Down
1 change: 1 addition & 0 deletions src/atomate2/qchem/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Module for QChem workflows."""
84 changes: 84 additions & 0 deletions src/atomate2/qchem/drones.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""Drones for parsing VASP calculations and realtd outputs."""

from __future__ import annotations

import logging
import os
from pathlib import Path

from emmet.core.qc_tasks import TaskDoc
from pymatgen.apps.borg.hive import AbstractDrone

logger = logging.getLogger(__name__)


class QChemDrone(AbstractDrone):
"""
A QChem drone to parse QChem outputs.
Parameters
----------
**task_document_kwargs
Additional keyword args passed to :obj: `.TaskDoc.from_directory`.
"""

def __init__(self, **task_document_kwargs) -> None:
self.task_document_kwargs = task_document_kwargs

def assimilate(self, path: str | Path | None = None) -> TaskDoc:
"""
Parse QChem output files and return the output document.
Parameters
----------
path : str pr Path or None
Path to the directory containing mol.qout and other output files.
Returns
-------
TaskDocument
A QChem task document
"""
path = path or Path.cwd()
try:
doc = TaskDoc.from_directory(path, **self.task_document_kwargs)
except Exception:
import traceback

logger.exception(
f"Error in {Path(path).absolute()}\n{traceback.format_exc()}"
)
raise
return doc

def get_valid_paths(self, path: tuple[str, list[str], list[str]]) -> list[str]:
"""
Get valid paths to assimilate.
Parameters
----------
path : tuple of (str, list of str, list of str)
Input path as a tuple generated from ``os.walk``, i.e., (parent, subdirs,
files).
Returns
-------
list of str
A list of paths to assimilate.
"""
parent, subdirs, _ = path
task_names = ["mol.qout.*"]
combined_paths = [parent + os.sep + sdir for sdir in subdirs]
rpath = []
for cpath in combined_paths:
fnames = os.listdir(cpath)
if any(name.startswith("mol.qout.") for name in fnames):
rpath.append(parent)

if (
not any(parent.endswith(os.sep + r) for r in task_names)
and len(list(Path(parent).glob("mol.qout*"))) > 0
):
rpath.append(parent)
return rpath
126 changes: 126 additions & 0 deletions src/atomate2/qchem/files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
"""Functions for manipulating QChem files."""

from __future__ import annotations

import logging
import re
from pathlib import Path
from typing import TYPE_CHECKING

from atomate2.common.files import copy_files, get_zfile, gunzip_files, rename_files
from atomate2.utils.file_client import FileClient, auto_fileclient
from atomate2.utils.path import strip_hostname

if TYPE_CHECKING:
from collections.abc import Sequence


logger = logging.getLogger(__name__)


@auto_fileclient
def copy_qchem_outputs(
src_dir: Path | str,
src_host: str | None = None,
additional_qchem_files: Sequence[str] = (),
file_client: FileClient | None = None,
) -> None:
"""
Copy QChem output files to the current directory.
For folders containing multiple calculations (e.g., suffixed with opt_1, opt_2,
etc), this function will only copy the files with the highest numbered suffix
and the suffix will be removed. Additional qchem files will be also be copied
with the same suffix applied.
Lastly, this function will gunzip any gzipped files.
Parameters
----------
src_dir : str or Path
The source directory.
src_host : str or None
The source hostname used to specify a remote filesystem. Can be given as
either "username@remote_host" or just "remote_host" in which case the username
will be inferred from the current user. If ``None``, the local filesystem will
be used as the source.
additional_qchem_files : list of str
Additional files to copy.
file_client : .FileClient
A file client to use for performing file operations.
"""
src_dir = strip_hostname(src_dir) # TODO: Handle hostnames properly.

logger.info(f"Copying QChem inputs from {src_dir}")
opt_ext = get_largest_opt_extension(src_dir, src_host, file_client=file_client)
directory_listing = file_client.listdir(src_dir, host=src_host)

# find required files
files = ("mol.qin", "mol.qout", *tuple(additional_qchem_files))
required_files = [get_zfile(directory_listing, r + opt_ext) for r in files]

copy_files(
src_dir,
src_host=src_host,
include_files=required_files,
file_client=file_client,
)

gunzip_files(
include_files=required_files,
allow_missing=True,
file_client=file_client,
)

# rename files to remove opt extension
if opt_ext:
all_files = required_files
files_to_rename = {
k.name.replace(".gz", ""): k.name.replace(opt_ext, "").replace(".gz", "")
for k in all_files
}
rename_files(files_to_rename, allow_missing=True, file_client=file_client)

logger.info("Finished copying inputs")


@auto_fileclient
def get_largest_opt_extension(
directory: Path | str,
host: str | None = None,
file_client: FileClient | None = None,
) -> str:
"""
Get the largest numbered opt extension of files in a directory.
For example, if listdir gives ["mol.qout.opt_0.gz", "mol.qout.opt_1.gz"],
this function will return ".opt_1".
Parameters
----------
directory : str or Path
A directory to search.
host : str or None
The hostname used to specify a remote filesystem. Can be given as either
"username@remote_host" or just "remote_host" in which case the username will be
inferred from the current user. If ``None``, the local filesystem will be used.
file_client : .FileClient
A file client to use for performing file operations.
Returns
-------
str
The opt extension or an empty string if there were not multiple relaxations.
"""
opt_files = file_client.glob(Path(directory) / "*.opt*", host=host)
if len(opt_files) == 0:
return ""
numbers = []
for file in opt_files:
match = re.search(r"\.opt_(\d+)", file.name)
if match:
numbers.append(match.group(1))

if not numbers:
return "" # No matches found
max_relax = max(numbers, key=int)
return f".opt_{max_relax}"
1 change: 1 addition & 0 deletions src/atomate2/qchem/jobs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Jobs for running QChem calculations."""
145 changes: 145 additions & 0 deletions src/atomate2/qchem/jobs/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
"""Definition of a base QChem Maker."""

from __future__ import annotations

from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, Callable

from emmet.core.qc_tasks import TaskDoc
from jobflow import Maker, Response, job
from monty.serialization import dumpfn
from monty.shutil import gzip_dir
from pymatgen.io.qchem.inputs import QCInput

from atomate2.qchem.files import copy_qchem_outputs
from atomate2.qchem.run import run_qchem, should_stop_children
from atomate2.qchem.sets.base import QCInputGenerator

if TYPE_CHECKING:
from pymatgen.core.structure import Molecule


def qchem_job(method: Callable) -> job:
"""
Decorate the ``make`` method of QChem job makers.
This is a thin wrapper around :obj:`~jobflow.core.job.Job` that configures common
settings for all QChem jobs. It also configures the output schema to be a QChem
:obj:`.TaskDoc`.
Any makers that return QChem jobs (not flows) should decorate the ``make`` method
with @qchem_job. For example:
.. code-block:: python
class MyQChemMaker(BaseQChemMaker):
@qchem_job
def make(molecule):
# code to run QChem job.
pass
Parameters
----------
method : callable
A BaseQChemMaker.make method. This should not be specified directly and is
implied by the decorator.
Returns
-------
callable
A decorated version of the make function that will generate QChem jobs.
"""
return job(method, data=QCInput, output_schema=TaskDoc)


@dataclass
class BaseQCMaker(Maker):
"""
Base QChem job maker.
Parameters
----------
name : str
The job name.
input_set_generator : .QChemInputGenerator
A generator used to make the input set.
write_input_set_kwargs : dict
Keyword arguments that will get passed to :obj:`.write_qchem_input_set`.
copy_qchem_kwargs : dict
Keyword arguments that will get passed to :obj:`.copy_qchem_outputs`.
run_qchem_kwargs : dict
Keyword arguments that will get passed to :obj:`.run_qchem`.
task_document_kwargs : dict
Keyword arguments that will get passed to :obj:`.TaskDoc.from_directory`.
stop_children_kwargs : dict
Keyword arguments that will get passed to :obj:`.should_stop_children`.
write_additional_data : dict
Additional data to write to the current directory. Given as a dict of
{filename: data}. Note that if using FireWorks, dictionary keys cannot contain
the "." character which is typically used to denote file extensions. To avoid
this, use the ":" character, which will automatically be converted to ".". E.g.
``{"my_file:txt": "contents of the file"}``.
"""

name: str = "base qchem job"
input_set_generator: QCInputGenerator = field(
default_factory=lambda: QCInputGenerator(
job_type="sp", scf_algorithm="diis", basis_set="def2-qzvppd"
)
)
write_input_set_kwargs: dict = field(default_factory=dict)
copy_qchem_kwargs: dict = field(default_factory=dict)
run_qchem_kwargs: dict = field(default_factory=dict)
task_document_kwargs: dict = field(default_factory=dict)
stop_children_kwargs: dict = field(default_factory=dict)
write_additional_data: dict = field(default_factory=dict)

@qchem_job
def make(
self, molecule: Molecule, prev_qchem_dir: str | Path | None = None
) -> Response:
"""
Run a QChem calculation.
Parameters
----------
molecule : Molecule
A pymatgen molecule object.
prev_qchem_dir : str or Path or None
A previous QChem calculation directory to copy output files from.
"""
# copy previous inputs
from_prev = prev_qchem_dir is not None
if prev_qchem_dir is not None:
copy_qchem_outputs(prev_qchem_dir, **self.copy_qchem_kwargs)

self.write_input_set_kwargs.setdefault("from_prev", from_prev)

# write qchem input files
# self.input_set_generator.get_input_set(molecule).write_inputs()
self.input_set_generator.get_input_set(molecule)

# write any additional data
for filename, data in self.write_additional_data.items():
dumpfn(data, filename.replace(":", "."))

# run qchem
run_qchem(**self.run_qchem_kwargs)

# parse qchem outputs
task_doc = TaskDoc.from_directory(Path.cwd(), **self.task_document_kwargs)
# task_doc.task_label = self.name
task_doc.task_type = self.name

# decide whether child jobs should proceed
stop_children = should_stop_children(task_doc, **self.stop_children_kwargs)

# gzip folder
gzip_dir(".")

return Response(
stop_children=stop_children,
stored_data={"custodian": task_doc.custodian},
output=task_doc,
)
Loading

0 comments on commit 78ad999

Please sign in to comment.