CP2K fixes (#4169)

* cp2k DftSet.get_basis_and_potential add keyword cp2k_data_dir which takes precedence over PMG_CP2K_DATA_DIR in pmgrc.yaml * get_basis_and_potential don't raise on potentials specified as string instead Potential instance * get_basis_and_potential if potential_filename or basis_filenames not already set, use the global ones * fix KeyError in calculation_type if self.data.get("dft") returns None * migrate setup_cp2k_data from ruamel.yaml pyyaml to fix AttributeError: "dump()" has been removed * change Keyword + Section verbose default to False to stop printing explanatory comments for sections and flags into every CP2K input file * fix trailing white space in Section._get_str * fix typo
materialsproject · Nov 13, 2024 · 8f24c97 · 8f24c97
1 parent 29b21bf
commit 8f24c97
Show file tree

Hide file tree

Showing 4 changed files with 78 additions and 87 deletions.
diff --git a/src/pymatgen/cli/pmg_config.py b/src/pymatgen/cli/pmg_config.py
@@ -13,7 +13,6 @@
 
 from monty.json import jsanitize
 from monty.serialization import dumpfn, loadfn
-from ruamel import yaml
 
 from pymatgen.core import OLD_SETTINGS_FILE, SETTINGS_FILE, Element
 from pymatgen.io.cp2k.inputs import GaussianTypeOrbitalBasisSet, GthPotential
@@ -26,6 +25,10 @@
 
 def setup_cp2k_data(cp2k_data_dirs: list[str]) -> None:
     """Setup CP2K basis and potential data directory."""
+    # this function used to use ruamel.yaml which underwent breaking changes. was easier to
+    # migrate to PyYAML than fix
+    import yaml  # type: ignore[import]
+
     data_dir, target_dir = (os.path.abspath(dirc) for dirc in cp2k_data_dirs)
     try:
         os.mkdir(target_dir)

diff --git a/src/pymatgen/io/cp2k/inputs.py b/src/pymatgen/io/cp2k/inputs.py
@@ -73,7 +73,7 @@ def __init__(
         *values,
         description: str | None = None,
         units: str | None = None,
-        verbose: bool | None = True,
+        verbose: bool | None = False,
         repeats: bool | None = False,
     ):
         """Initialize a keyword. These Keywords and the value passed to them are sometimes as simple
@@ -103,7 +103,7 @@ def __str__(self):
         return (
             f"{self.name} {f'[{self.units}] ' if self.units else ''}"
             + " ".join(map(str, self.values))
-            + (" ! " + self.description if (self.description and self.verbose) else "")
+            + (f" ! {self.description}" if (self.description and self.verbose) else "")
         )
 
     def __eq__(self, other: object) -> bool:
@@ -249,7 +249,7 @@ def __init__(
         keywords: dict | None = None,
         section_parameters: list | tuple | None = None,
         location: str | None = None,
-        verbose: bool | None = True,
+        verbose: bool | None = False,
         alias: str | None = None,
         **kwargs,
     ):
@@ -516,10 +516,10 @@ def check(self, path: str):
             path (str): Path to section of form 'SUBSECTION1/SUBSECTION2/SUBSECTION_OF_INTEREST'
         """
         _path = path.split("/")
-        s = self.subsections
+        sub_secs = self.subsections
         for p in _path:
-            if tmp := [_ for _ in s if p.upper() == _.upper()]:
-                s = s[tmp[0]].subsections
+            if tmp := [_ for _ in sub_secs if p.upper() == _.upper()]:
+                sub_secs = sub_secs[tmp[0]].subsections
             else:
                 return False
         return True
@@ -535,8 +535,8 @@ def by_path(self, path: str):
         if _path[0].upper() == self.name.upper():
             _path = _path[1:]
         sec_str = self
-        for p in _path:
-            sec_str = sec_str.get_section(p)
+        for pth in _path:
+            sec_str = sec_str.get_section(pth)
         return sec_str
 
     def get_str(self) -> str:
@@ -559,15 +559,15 @@ def _get_str(d, indent=0):
             )
             string += f"\n{filled}\n"
         string += "\t" * indent + f"&{d.name}"
-        string += f" {' '.join(map(str, d.section_parameters))}\n"
+        string += f"{' '.join(map(str, ['', *d.section_parameters]))}\n"
 
-        for v in d.keywords.values():
-            if isinstance(v, KeywordList):
-                string += f"{v.get_str(indent=indent + 1)}\n"
+        for val in d.keywords.values():
+            if isinstance(val, KeywordList):
+                string += f"{val.get_str(indent=indent + 1)}\n"
             else:
-                string += "\t" * (indent + 1) + v.get_str() + "\n"
-        for v in d.subsections.values():
-            string += v._get_str(v, indent + 1)
+                string += "\t" * (indent + 1) + val.get_str() + "\n"
+        for val in d.subsections.values():
+            string += val._get_str(val, indent + 1)
         string += "\t" * indent + f"&END {d.name}\n"
 
         return string
@@ -851,14 +851,14 @@ class Dft(Section):
 
     def __init__(
         self,
-        basis_set_filenames: Iterable = ("BASIS_MOLOPT",),
-        potential_filename="GTH_POTENTIALS",
+        basis_set_filenames: Sequence[str] = ("BASIS_MOLOPT",),
+        potential_filename: str = "GTH_POTENTIALS",
         uks: bool = True,
         wfn_restart_file_name: str | None = None,
         keywords: dict | None = None,
         subsections: dict | None = None,
         **kwargs,
-    ):
+    ) -> None:
         """Initialize the DFT section.
 
         Args:
@@ -881,14 +881,11 @@ def __init__(
 
         description = "Parameter needed by dft programs"
 
+        uks_desc = "Whether to run unrestricted Kohn Sham (i.e. spin polarized)"
         _keywords = {
             "BASIS_SET_FILE_NAME": KeywordList([Keyword("BASIS_SET_FILE_NAME", k) for k in basis_set_filenames]),
             "POTENTIAL_FILE_NAME": Keyword("POTENTIAL_FILE_NAME", potential_filename),
-            "UKS": Keyword(
-                "UKS",
-                uks,
-                description="Whether to run unrestricted Kohn Sham (i.e. spin polarized)",
-            ),
+            "UKS": Keyword("UKS", uks, description=uks_desc),
         }
 
         if wfn_restart_file_name:
@@ -1320,7 +1317,7 @@ def __init__(self, lattice: Lattice, keywords: dict | None = None, **kwargs):
         """
         self.lattice = lattice
         keywords = keywords or {}
-        description = "Lattice parameters and optional settings for creating a the CELL"
+        description = "Lattice parameters and optional settings for creating the CELL"
 
         _keywords = {
             "A": Keyword("A", *lattice.matrix[0]),
@@ -1381,27 +1378,7 @@ def __init__(
         description = "The description of this kind of atom including basis sets, element, etc."
 
         # Special case for closed-shell elements. Cannot impose magnetization in CP2K.
-        closed_shell_elems = {
-            2,
-            4,
-            10,
-            12,
-            18,
-            20,
-            30,
-            36,
-            38,
-            48,
-            54,
-            56,
-            70,
-            80,
-            86,
-            88,
-            102,
-            112,
-            118,
-        }
+        closed_shell_elems = {2, 4, 10, 12, 18, 20, 30, 36, 38, 48, 54, 56, 70, 80, 86, 88, 102, 112, 118}
         if Element(self.specie).Z in closed_shell_elems:
             self.magnetization = 0
 
@@ -1665,7 +1642,7 @@ def __init__(self, keywords: dict | None = None, subsections: dict | None = None
         keywords = keywords or {}
         subsections = subsections or {}
         description = (
-            "Controls the printing of a cube file with eletrostatic potential generated by "
+            "Controls the printing of a cube file with electrostatic potential generated by "
             "the total density (electrons+ions). It is valid only for QS with GPW formalism. "
             "Note: by convention the potential has opposite sign than the expected physical one."
         )
@@ -1702,7 +1679,7 @@ def __init__(
         keywords = keywords or {}
         subsections = subsections or {}
         description = (
-            "Controls the printing of a cube file with eletrostatic potential generated by "
+            "Controls the printing of a cube file with electrostatic potential generated by "
             "the total density (electrons+ions). It is valid only for QS with GPW formalism. "
             "Note: by convention the potential has opposite sign than the expected physical one."
         )

diff --git a/src/pymatgen/io/cp2k/outputs.py b/src/pymatgen/io/cp2k/outputs.py
@@ -112,7 +112,7 @@ def run_type(self):
         return self.data.get("global").get("Run_type")
 
     @property
-    def calculation_type(self):
+    def calculation_type(self) -> str:
         """The calculation type (what io.vasp.outputs calls run_type)."""
         LDA_TYPES = {
             "LDA",
@@ -122,11 +122,8 @@ def calculation_type(self):
             "BECKE88_LR_ADIABATIC",
             "BECKE97",
         }
-
         GGA_TYPES = {"PBE", "PW92"}
-
         HYBRID_TYPES = {"BLYP", "B3LYP"}
-
         METAGGA_TYPES = {
             "TPSS": "TPSS",
             "RTPSS": "revTPSS",
@@ -139,36 +136,36 @@ def calculation_type(self):
         }
 
         functional = self.data.get("dft", {}).get("functional", [None])
-        ip = self.data.get("dft", {}).get("hfx", {}).get("Interaction_Potential")
+        inter_pot = self.data.get("dft", {}).get("hfx", {}).get("Interaction_Potential")
         frac = self.data.get("dft", {}).get("hfx", {}).get("FRACTION")
 
         if len(functional) > 1:
-            rt = "Mixed: " + ", ".join(functional)
+            run_type = "Mixed: " + ", ".join(functional)
             functional = " ".join(functional)
-            if "HYP" in functional or (ip and frac) or (functional in HYBRID_TYPES):
-                rt = "Hybrid"
+            if "HYP" in functional or (inter_pot and frac) or (functional in HYBRID_TYPES):
+                run_type = "Hybrid"
         else:
             functional = functional[0]
 
             if functional is None:
-                rt = "None"
-            elif "HYP" in functional or (ip and frac) or (functional) in HYBRID_TYPES:
-                rt = "Hybrid"
+                run_type = "None"
+            elif "HYP" in functional or (inter_pot and frac) or (functional) in HYBRID_TYPES:
+                run_type = "Hybrid"
             elif "MGGA" in functional or functional in METAGGA_TYPES:
-                rt = "METAGGA"
+                run_type = "METAGGA"
             elif "GGA" in functional or functional in GGA_TYPES:
-                rt = "GGA"
+                run_type = "GGA"
             elif "LDA" in functional or functional in LDA_TYPES:
-                rt = "LDA"
+                run_type = "LDA"
             else:
-                rt = "Unknown"
+                run_type = "Unknown"
 
         if self.is_hubbard:
-            rt += "+U"
-        if self.data.get("dft").get("vdw"):
-            rt += "+VDW"
+            run_type += "+U"
+        if self.data.get("dft", {}).get("vdw"):
+            run_type += "+VDW"
 
-        return rt
+        return run_type
 
     @property
     def project_name(self) -> str:

diff --git a/src/pymatgen/io/cp2k/sets.py b/src/pymatgen/io/cp2k/sets.py
@@ -21,8 +21,9 @@
 
 import itertools
 import os
+import typing
 import warnings
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
 from ruamel.yaml import YAML
@@ -70,6 +71,7 @@
 from pymatgen.io.vasp.inputs import KpointsSupportedModes
 
 if TYPE_CHECKING:
+    from pathlib import Path
     from typing import Literal
 
 __author__ = "Nicholas Winner"
@@ -354,8 +356,13 @@ def __init__(
         if kwargs.get("validate", True):
             self.validate()
 
+    @typing.no_type_check
     @staticmethod
-    def get_basis_and_potential(structure, basis_and_potential):
+    def get_basis_and_potential(
+        structure: Structure,
+        basis_and_potential: dict[str, dict[str, Any]],
+        cp2k_data_dir: str | Path | None = None,
+    ) -> dict[str, dict[str, Any]]:
         """Get a dictionary of basis and potential info for constructing the input file.
 
         data in basis_and_potential argument can be specified in several ways:
@@ -395,7 +402,8 @@ def get_basis_and_potential(structure, basis_and_potential):
 
         Will raise an error if no basis/potential info can be found according to the input.
         """
-        data = {"basis_filenames": []}
+        cp2k_data_dir = cp2k_data_dir or SETTINGS.get("PMG_CP2K_DATA_DIR", ".")
+        data: dict[str, list[str]] = {"basis_filenames": []}
         functional = basis_and_potential.get("functional", SETTINGS.get("PMG_DEFAULT_CP2K_FUNCTIONAL"))
         basis_type = basis_and_potential.get("basis_type", SETTINGS.get("PMG_DEFAULT_CP2K_BASIS_TYPE"))
         potential_type = basis_and_potential.get(
@@ -405,18 +413,16 @@ def get_basis_and_potential(structure, basis_and_potential):
         aux_basis_type = basis_and_potential.get("aux_basis_type", SETTINGS.get("PMG_DEFAULT_CP2K_AUX_BASIS_TYPE"))
 
         for el in structure.symbol_set:
-            possible_basis_sets = []
-            possible_potentials = []
-            basis, aux_basis, potential, DATA = None, None, None, None
+            possible_basis_sets, possible_potentials = [], []
+            DATA: dict[str, dict[str, Any]] = {}
+            basis, aux_basis, potential = None, None, None
             desired_basis, desired_aux_basis, desired_potential = None, None, None
-            have_element_file = os.path.isfile(os.path.join(SETTINGS.get("PMG_CP2K_DATA_DIR", "."), el))
+            elem_file_path = os.path.join(cp2k_data_dir, el)
+            have_element_file = os.path.isfile(elem_file_path)
 
             # Necessary if matching data to CP2K data files
             if have_element_file:
-                with open(
-                    os.path.join(SETTINGS.get("PMG_CP2K_DATA_DIR", "."), el),
-                    encoding="utf-8",
-                ) as file:
+                with open(elem_file_path, encoding="utf-8") as file:
                     yaml = YAML(typ="unsafe", pure=True)
                     DATA = yaml.load(file)
                     if not DATA.get("basis_sets"):
@@ -510,16 +516,16 @@ def get_basis_and_potential(structure, basis_and_potential):
                 reverse=True,
             )
 
-            def match_elecs(x):
-                for p in possible_potentials:
-                    if x.info.electrons == p.info.electrons:
-                        return p
+            def match_elecs(basis_set):
+                for potential in possible_potentials:
+                    if basis_set.info.electrons == potential.info.electrons:
+                        return potential
                 return None
 
-            for b in possible_basis_sets:
-                fb = match_elecs(b)
+            for basis_set in possible_basis_sets:
+                fb = match_elecs(basis_set)
                 if fb is not None:
-                    basis = b
+                    basis = basis_set
                     potential = fb
                     break
 
@@ -547,7 +553,8 @@ def match_elecs(x):
             if hasattr(basis, "filename"):
                 data["basis_filenames"].append(basis.filename)
             pfn1 = data.get("potential_filename")
-            pfn2 = potential.filename
+            # use getattr to not raise an error if potential is str, not Potential object
+            pfn2 = getattr(potential, "filename", None)
             if pfn1 and pfn2 and pfn1 != pfn2:
                 raise ValueError(
                     "Provided potentials have more than one corresponding file."
@@ -556,6 +563,13 @@ def match_elecs(x):
             data["potential_filename"] = pfn2
 
             data[el] = {"basis": basis, "aux_basis": aux_basis, "potential": potential}
+
+        # if potential_filename or basis_filenames not set by above code, use the global ones
+        if not data["potential_filename"]:
+            data["potential_filename"] = basis_and_potential.get("potential_filename")
+        if not data["basis_filenames"]:
+            data["basis_filenames"] = basis_and_potential.get("basis_filenames")
+
         return data
 
     @staticmethod
@@ -677,7 +691,7 @@ def print_mo(self) -> None:
 
     def print_v_hartree(self, stride=(2, 2, 2)) -> None:
         """
-        Controls the printing of a cube file with eletrostatic potential generated by the
+        Controls the printing of a cube file with electrostatic potential generated by the
             total density (electrons+ions). It is valid only for QS with GPW formalism.
         Note that by convention the potential has opposite sign than the expected physical one.
         """