Skip to content

Commit

Permalink
Add descriptors (#163)
Browse files Browse the repository at this point in the history
* Add MLIP descriptors
---------

Co-authored-by: Jacob Wilkins <[email protected]>
  • Loading branch information
ElliottKasoar and oerc0122 authored May 30, 2024
1 parent 91775dd commit d084433
Show file tree
Hide file tree
Showing 7 changed files with 570 additions and 0 deletions.
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,27 @@ Foundational models can also be fine-tuned, by including the `foundation_model`
janus train --mlip-config /path/to/fine/tuning/config.yml --fine-tune
```


### Calculate MACE descriptors

MACE descriptors can be calculated for structures (using the [MACE-MP](https://github.com/ACEsuit/mace-mp) "small" force-field):

```shell
janus descriptors --struct tests/data/NaCl.cif --arch mace_mp --calc-kwargs "{'model' : 'small'}"
```

This will calculate the mean descriptor for this structure and save this as attached information (`descriptors`) in `NaCl-descriptors.xyz`, in addition to generating a log file, `descriptors.log`, and summary of inputs, `descriptors_summary.yml`.

The mean descriptor per element can also be calculated, and all descriptors, rather than only the invariant part, can be used when calculating the means:

```shell
janus descriptors --struct tests/data/NaCl.cif --no-invariants-only --calc-per-element
```

This will generate the same output files, but additional labels (`Cl_descriptor` and `Na_descriptor`) will be saved in `NaCl-descriptors.xyz`.

For all options, run `janus descriptors --help`.

## License

[BSD 3-Clause License](LICENSE)
Expand Down
20 changes: 20 additions & 0 deletions docs/source/apidoc/janus_core.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ janus\_core.cli.eos module
:undoc-members:
:show-inheritance:

janus\_core.cli.descriptors module
----------------------------------

.. automodule:: janus_core.cli.descriptors
:members:
:special-members:
:private-members:
:undoc-members:
:show-inheritance:

janus\_core.cli.types module
----------------------------

Expand Down Expand Up @@ -144,6 +154,16 @@ janus\_core.helpers.mlip\_calculators module
:undoc-members:
:show-inheritance:

janus\_core.helpers.descriptors module
--------------------------------------

.. automodule:: janus_core.helpers.descriptors
:members:
:special-members:
:private-members:
:undoc-members:
:show-inheritance:

janus\_core.helpers.train module
--------------------------------

Expand Down
157 changes: 157 additions & 0 deletions janus_core/cli/descriptors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
"""Set up MLIP descriptors commandline interface."""

from pathlib import Path
from typing import Annotated

from typer import Context, Option, Typer
from typer_config import use_config

from janus_core.calculations.single_point import SinglePoint
from janus_core.cli.types import (
Architecture,
CalcKwargs,
Device,
LogPath,
ReadKwargs,
StructPath,
Summary,
WriteKwargs,
)
from janus_core.cli.utils import (
check_config,
end_summary,
parse_typer_dicts,
save_struct_calc,
start_summary,
yaml_converter_callback,
)
from janus_core.helpers.descriptors import calc_descriptors
from janus_core.helpers.utils import dict_paths_to_strs

app = Typer()


@app.command(help="Calculate MLIP descriptors.")
@use_config(yaml_converter_callback)
def descriptors(
# pylint: disable=too-many-arguments,too-many-locals,duplicate-code
# numpydoc ignore=PR02
ctx: Context,
struct: StructPath,
invariants_only: Annotated[
bool,
Option(help="Only calculate invariant descriptors."),
] = True,
calc_per_element: Annotated[
bool,
Option(help="Calculate mean descriptors for each element."),
] = False,
arch: Architecture = "mace_mp",
device: Device = "cpu",
out: Annotated[
Path,
Option(
help=(
"Path to save structure with calculated descriptors. Default is "
"inferred from name of structure file."
),
),
] = None,
read_kwargs: ReadKwargs = None,
calc_kwargs: CalcKwargs = None,
write_kwargs: WriteKwargs = None,
log: LogPath = "descriptors.log",
summary: Summary = "descriptors_summary.yml",
):
"""
Calculate MLIP descriptors for the given structure(s).
Parameters
----------
ctx : Context
Typer (Click) Context. Automatically set.
struct : Path
Path of structure to simulate.
invariants_only : bool
Whether only the invariant descriptors should be returned. Default is True.
calc_per_element : bool
Whether to calculate mean descriptors for each element. Default is False.
arch : Optional[str]
MLIP architecture to use for single point calculations.
Default is "mace_mp".
device : Optional[str]
Device to run model on. Default is "cpu".
out : Optional[Path]
Path to save structure with calculated results. Default is inferred from name
of the structure file.
read_kwargs : Optional[dict[str, Any]]
Keyword arguments to pass to ase.io.read. Default is {}.
calc_kwargs : Optional[dict[str, Any]]
Keyword arguments to pass to the selected calculator. Default is {}.
write_kwargs : Optional[dict[str, Any]]
Keyword arguments to pass to ase.io.write when saving results. Default is {}.
log : Optional[Path]
Path to write logs to. Default is "descriptors.log".
summary : Path
Path to save summary of inputs and start/end time. Default is
descriptors_summary.yml.
config : Path
Path to yaml configuration file to define the above options. Default is None.
"""
# Check options from configuration file are all valid
check_config(ctx)

[read_kwargs, calc_kwargs, write_kwargs] = parse_typer_dicts(
[read_kwargs, calc_kwargs, write_kwargs]
)

# Set up single point calculator
s_point = SinglePoint(
struct_path=struct,
architecture=arch,
device=device,
read_kwargs=read_kwargs,
calc_kwargs=calc_kwargs,
log_kwargs={"filename": log, "filemode": "w"},
)

# Check optimized structure path not duplicated
if "filename" in write_kwargs:
raise ValueError("'filename' must be passed through the --out option")

# Set default filname for writing structure with descriptors if not specified
if out:
write_kwargs["filename"] = out
else:
write_kwargs["filename"] = f"{s_point.struct_name}-descriptors.xyz"

# Dictionary of inputs for optimize function
descriptors_kwargs = {
"struct": s_point.struct,
"invariants_only": invariants_only,
"calc_per_element": calc_per_element,
"write_results": True,
"write_kwargs": write_kwargs,
"log_kwargs": {"filename": log, "filemode": "a"},
}

# Store inputs for yaml summary
inputs = descriptors_kwargs.copy()

# Store only filename as filemode is not set by user
del inputs["log_kwargs"]
inputs["log"] = log

save_struct_calc(inputs, s_point, arch, device, read_kwargs, calc_kwargs)

# Convert all paths to strings in inputs nested dictionary
dict_paths_to_strs(inputs)

# Save summary information before optimization begins
start_summary(command="descriptors", summary=summary, inputs=inputs)

# Run geometry optimization and save output structure
calc_descriptors(**descriptors_kwargs)

# Time after optimization has finished
end_summary(summary)
2 changes: 2 additions & 0 deletions janus_core/cli/janus.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typer import Exit, Option, Typer

from janus_core import __version__
from janus_core.cli.descriptors import descriptors
from janus_core.cli.eos import eos
from janus_core.cli.geomopt import geomopt
from janus_core.cli.md import md
Expand All @@ -17,6 +18,7 @@
app.command()(md)
app.command()(phonons)
app.command()(eos)
app.command()(descriptors)

# Train not imlpemented in older versions of MACE
try:
Expand Down
143 changes: 143 additions & 0 deletions janus_core/helpers/descriptors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""Calculate MLIP descriptors for structures."""

from collections.abc import Sequence
from logging import Logger
from pathlib import Path
from typing import Any, Optional

from ase import Atoms
from ase.io import write
import numpy as np

from janus_core.helpers.janus_types import ASEWriteArgs, MaybeSequence
from janus_core.helpers.log import config_logger
from janus_core.helpers.utils import none_to_dict


def calc_descriptors(
struct: MaybeSequence[Atoms],
struct_name: Optional[str] = None,
invariants_only: bool = True,
calc_per_element: bool = False,
write_results: bool = False,
write_kwargs: Optional[ASEWriteArgs] = None,
log_kwargs: Optional[dict[str, Any]] = None,
) -> MaybeSequence[Atoms]:
"""
Prepare and call calculation of MLIP descriptors for the given structure(s).
Parameters
----------
struct : MaybeSequence[Atoms]
Structure(s) to calculate descriptors for.
struct_name : Optional[str]
Name of structure. Default is None.
invariants_only : bool
Whether only the invariant descriptors should be returned. Default is True.
calc_per_element : bool
Whether to calculate mean descriptors for each element. Default is False.
write_results : bool
True to write out structure with results of calculations. Default is False.
write_kwargs : Optional[ASEWriteArgs],
Keyword arguments to pass to ase.io.write if saving structure with
results of calculations. Default is {}.
log_kwargs : Optional[dict[str, Any]]
Keyword arguments to pass to `config_logger`. Default is {}.
Returns
-------
MaybeSequence[Atoms]
Atoms object(s) with descriptors attached as info.
"""
[write_kwargs, log_kwargs] = none_to_dict([write_kwargs, log_kwargs])
log_kwargs.setdefault("name", __name__)
logger = config_logger(**log_kwargs)

# Set default name for output file
if not struct_name:
if isinstance(struct, Sequence):
struct_name = struct[0].get_chemical_formula()
else:
struct_name = struct.get_chemical_formula()

write_kwargs.setdefault(
"filename",
Path(f"./{struct_name}-descriptors.xyz").absolute(),
)

if isinstance(struct, Sequence):
if any(not image.calc for image in struct):
raise ValueError("Please attach a calculator to all images in `struct`.")
else:
if not struct.calc:
raise ValueError("Please attach a calculator to `struct`.")

if logger:
logger.info("Starting descriptors calculation")

if isinstance(struct, Sequence):
for image in struct:
image = _calc_descriptors(
image,
invariants_only=invariants_only,
calc_per_element=calc_per_element,
logger=logger,
)
else:
struct = _calc_descriptors(
struct,
invariants_only=invariants_only,
calc_per_element=calc_per_element,
logger=logger,
)

if logger:
logger.info("Descriptors calculation complete")

if write_results:
write(images=struct, **write_kwargs, write_info=True)

return struct


def _calc_descriptors(
struct: Atoms,
invariants_only: bool = True,
calc_per_element: bool = False,
logger: Optional[Logger] = None,
) -> None:
"""
Calculate MLIP descriptors for the given structure(s).
Parameters
----------
struct : Atoms
Structure(s) to calculate descriptors for.
invariants_only : bool
Whether only the invariant descriptors should be returned. Default is True.
calc_per_element : bool
Whether to calculate mean descriptors for each element. Default is False.
logger : Optional[Logger]
Logger if log file has been specified.
Returns
-------
MaybeSequence[Atoms]
Atoms object(s) with array of descriptors attached.
"""
if logger:
logger.info("invariants_only: %s", invariants_only)
logger.info("calc_per_element: %s", calc_per_element)

# Calculate mean descriptor and save mean
descriptors = struct.calc.get_descriptors(struct, invariants_only=invariants_only)
descriptor = np.mean(descriptors)
struct.info["descriptor"] = descriptor

if calc_per_element:
elements = set(struct.get_chemical_symbols())
for element in elements:
pattern = [atom.index for atom in struct if atom.symbol == element]
struct.info[f"{element}_descriptor"] = np.mean(descriptors[pattern, :])

return struct
Loading

0 comments on commit d084433

Please sign in to comment.