From 89a3d58411cd7ea73c71fac7937cb8ba28fd2427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= <106986430+jonathan-schoeps@users.noreply.github.com> Date: Fri, 17 Jan 2025 11:24:52 +0100 Subject: [PATCH] Added support for Turbomole (#101) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Interface for the qm program turbomole Signed-off-by: Jonathan Schöps * support for turbomole Signed-off-by: Jonathan Schöps * corrected some typos Signed-off-by: Jonathan Schöps * changed the PARNODES command Signed-off-by: Jonathan Schöps * coord handling Signed-off-by: Jonathan Schöps * requestet changes are build in Signed-off-by: Jonathan Schöps * Implementation of the requested changes and the test of the read_mol_from_coord function Signed-off-by: Jonathan Schöps * Implementation of two pathes for turbomole Signed-off-by: Jonathan Schöps * Ignore HOMO-LUMO gap not implemented as refine engine for Orca and Trubomole Signed-off-by: Jonathan Schöps * improve TURBOMOLE configuration Signed-off-by: Marcel Müller * correct linting errors Signed-off-by: Marcel Mueller * exclude more recent interfaces from coverage check Signed-off-by: Marcel Müller * added a test for the 'write_coord_to_file' function Signed-off-by: Jonathan Schöps * sync the lokal branch with the remote branch Signed-off-by: Jonathan Schöps --------- Signed-off-by: Jonathan Schöps Signed-off-by: Marcel Müller Signed-off-by: Marcel Mueller Co-authored-by: Marcel Müller --- CHANGELOG.md | 1 + mindlessgen.toml | 16 +- pyproject.toml | 7 +- src/mindlessgen/generator/main.py | 41 +++- src/mindlessgen/molecules/molecule.py | 155 ++++++++++++- src/mindlessgen/molecules/refinement.py | 3 + src/mindlessgen/prog/__init__.py | 2 + src/mindlessgen/prog/config.py | 111 ++++++++- src/mindlessgen/qm/__init__.py | 4 + src/mindlessgen/qm/tm.py | 286 ++++++++++++++++++++++++ test/test_molecules/test_molecule.py | 76 +++++++ 11 files changed, 687 insertions(+), 15 deletions(-) create mode 100644 src/mindlessgen/qm/tm.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 98b8c43..8be9cb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - support for `python-3.13` - option to set a fixed molecular charge, while ensuring `uhf = 0` - `element_composition` and `forbidden_elements` can now be directly set to a `dict` or `list`, respectively, via API access +- support for TURBOMOLE as QM engine. ### Breaking Changes - Removal of the `dist_threshold` flag and in the `-toml` file. diff --git a/mindlessgen.toml b/mindlessgen.toml index 8151b53..722aa5f 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -51,7 +51,7 @@ molecular_charge = "none" [refine] # > Maximum number of fragment optimization cycles. Options: max_frag_cycles = 10 -# > Quantum Mechanics (QM) engine to use. Options: 'xtb', 'orca' +# > Quantum Mechanics (QM) engine to use. Options: 'xtb', 'orca'. 'turbomole' engine = "xtb" # > HOMO-LUMO gap threshold applied at the end of the refinement step hlgap = 0.5 @@ -60,7 +60,7 @@ hlgap = 0.5 debug = false [postprocess] -# > Engine for the post-processing part. Options: 'xtb', 'orca' +# > Engine for the post-processing part. Options: 'xtb', 'orca', 'turbomole' engine = "orca" # > Optimize geometry in the post-processing part. If `false`, only a single-point is conducted. Options: optimize = true @@ -88,3 +88,15 @@ basis = "def2-SVP" gridsize = 1 # > Maximum number of SCF cycles: Options: scf_cycles = 100 + +[turbomole] +# > Path to the ridft executable. The name `ridft` is automatically searched for. Options: +ridft_path = "/path/to/ridft" +# > Path to the jobex executable. The name `jobex` is automatically searched for. Options: +jobex_path = "/path/to/jobex" +# > Functional/Method: Options: +functional = "PBE" +# > Basis set: Options: +basis = "def2-SVP" +# > Maximum number of SCF cycles: Options: +scf_cycles = 100 diff --git a/pyproject.toml b/pyproject.toml index 9e28d00..fc0b69d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,12 @@ markers = "optional: mark test as optional" plugins = ["covdefaults"] source = ["./src"] # Exclude interfaces to external programs from coverage -omit = ["src/mindlessgen/qm/xtb.py", "src/mindlessgen/qm/orca.py"] +omit = [ + "src/mindlessgen/qm/xtb.py", + "src/mindlessgen/qm/orca.py", + "src/mindlessgen/qm/tm.py", + "src/mindlessgen/qm/gxtb.py", +] [tool.coverage.report] fail_under = 50 diff --git a/src/mindlessgen/generator/main.py b/src/mindlessgen/generator/main.py index 5094f8a..1854d38 100644 --- a/src/mindlessgen/generator/main.py +++ b/src/mindlessgen/generator/main.py @@ -10,7 +10,18 @@ import warnings from ..molecules import generate_random_molecule, Molecule -from ..qm import XTB, get_xtb_path, QMMethod, ORCA, get_orca_path, GXTB, get_gxtb_path +from ..qm import ( + XTB, + get_xtb_path, + QMMethod, + ORCA, + get_orca_path, + Turbomole, + get_ridft_path, + get_jobex_path, + GXTB, + get_gxtb_path, +) from ..molecules import iterative_optimization, postprocess_mol from ..prog import ConfigManager @@ -46,6 +57,8 @@ def generator(config: ConfigManager) -> tuple[list[Molecule] | None, int]: config, get_xtb_path, get_orca_path, # g-xTB cannot be used anyway + get_ridft_path, + get_jobex_path, ) if config.general.postprocess: @@ -54,6 +67,8 @@ def generator(config: ConfigManager) -> tuple[list[Molecule] | None, int]: config, get_xtb_path, get_orca_path, + get_ridft_path, + get_jobex_path, get_gxtb_path, ) else: @@ -77,12 +92,12 @@ def generator(config: ConfigManager) -> tuple[list[Molecule] | None, int]: for molcount in range(config.general.num_molecules): # print a decent header for each molecule iteration if config.general.verbosity > 0: - print(f"\n{'='*80}") + print(f"\n{'=' * 80}") print( - f"{'='*22} Generating molecule {molcount + 1:<4} of " - + f"{config.general.num_molecules:<4} {'='*24}" + f"{'=' * 22} Generating molecule {molcount + 1:<4} of " + + f"{config.general.num_molecules:<4} {'=' * 24}" ) - print(f"{'='*80}") + print(f"{'=' * 80}") manager = mp.Manager() stop_event = manager.Event() cycles = range(config.general.max_cycles) @@ -270,6 +285,8 @@ def setup_engines( cfg: ConfigManager, xtb_path_func: Callable, orca_path_func: Callable, + ridft_path_func: Callable, + jobex_path_func: Callable, gxtb_path_func: Callable | None = None, ): """ @@ -291,6 +308,20 @@ def setup_engines( except ImportError as e: raise ImportError("orca not found.") from e return ORCA(path, cfg.orca) + elif engine_type == "turbomole": + try: + jobex_path = jobex_path_func(cfg.turbomole.jobex_path) + if not jobex_path: + raise ImportError("jobex not found.") + except ImportError as e: + raise ImportError("jobex not found.") from e + try: + ridft_path = ridft_path_func(cfg.turbomole.ridft_path) + if not ridft_path: + raise ImportError("ridft not found.") + except ImportError as e: + raise ImportError("ridft not found.") from e + return Turbomole(jobex_path, ridft_path, cfg.turbomole) elif engine_type == "gxtb": if gxtb_path_func is None: raise ImportError("No callable function for determining the g-xTB path.") diff --git a/src/mindlessgen/molecules/molecule.py b/src/mindlessgen/molecules/molecule.py index 4928a5b..7b23d57 100644 --- a/src/mindlessgen/molecules/molecule.py +++ b/src/mindlessgen/molecules/molecule.py @@ -134,6 +134,11 @@ PSE_NUMBERS: dict[str, int] = {k.lower(): v for v, k in PSE.items()} PSE_SYMBOLS: dict[int, str] = {v: k.lower() for v, k in PSE.items()} +BOHR2AA = ( + 0.529177210544 # taken from https://physics.nist.gov/cgi-bin/cuu/Value?bohrrada0 +) +AA2BOHR = 1 / BOHR2AA + class Molecule: """ @@ -264,6 +269,42 @@ def read_mol_from_file(file: str | Path) -> Molecule: molecule.name = file_path.stem return molecule + @staticmethod + def read_mol_from_coord(file: str | Path) -> Molecule: + """ + Read the XYZ coordinates and the charge of the molecule from a 'coord' file. + Thereby, generate a completely new molecule object from scratch. + + Can be called like this: + from molecule import Molecule + # Call the static method using the class name + coord_file = "coord" + molecule_instance = Molecule.read_mol_from_coord(coord_file) + # Now you can use the molecule_instance as needed + print(molecule_instance.name) + + :param file: The 'coord' file to read from. + :return: A new instance of Molecule with the read data. + """ + molecule = Molecule() + if isinstance(file, str): + file_path = Path(file).resolve() + elif isinstance(file, Path): + file_path = file.resolve() + else: + raise TypeError("String or Path expected.") + molecule.read_xyz_from_coord(file_path) + if file_path.with_suffix(".CHRG").exists(): + molecule.read_charge_from_file(file_path.with_suffix(".CHRG")) + else: + molecule.charge = 0 + if file_path.with_suffix(".UHF").exists(): + molecule.read_uhf_from_file(file_path.with_suffix(".UHF")) + else: + molecule.uhf = 0 + molecule.name = file_path.stem + return molecule + @property def name(self) -> str: """ @@ -480,7 +521,7 @@ def get_xyz_str(self) -> str: xyz_str += commentline for i in range(self.num_atoms): xyz_str += ( - f"{PSE[self.ati[i]+1]:<5} " + f"{PSE[self.ati[i] + 1]:<5} " + f"{self.xyz[i, 0]:>12.7f} " + f"{self.xyz[i, 1]:>12.7f} " + f"{self.xyz[i, 2]:>12.7f}\n" @@ -558,6 +599,118 @@ def read_xyz_from_file(self, filename: str | Path) -> None: self.xyz[i, 2] = float(line[3]) self.atlist[self.ati[i]] += 1 + def get_coord_str(self) -> str: + """ + Obtain a string with the full 'coord' file information of the molecule. + """ + coord_str = "$coord\n" + for i in range(self.num_atoms): + coord_str += ( + f"{self.xyz[i, 0] / BOHR2AA:>12.7f} " + + f"{self.xyz[i, 1] / BOHR2AA:>12.7f} " + + f"{self.xyz[i, 2] / BOHR2AA:>12.7f} " + + f"{PSE[self.ati[i] + 1]:>5}\n" + ) + coord_str += "$end\n" + return coord_str + + def write_coord_to_file(self, filename: str | Path | None = None): + """ + Write the 'coord' file of the molecule to a file. + + The layout of the file is as follows: + ``` + $coord + + + ... + $end + ``` + + :param filename: The name of the file to write to. + """ + # raise an error if the number of atoms is not set + if self._num_atoms is None: + raise ValueError("Number of atoms not set.") + if not self._ati.size: + raise ValueError("Atomic numbers not set.") + if not self._xyz.size: + raise ValueError("Atomic coordinates not set.") + + if filename: + if not isinstance(filename, Path): + filename = Path(filename).resolve() + else: + filename = Path("mlm_" + self.name + ".coord").resolve() + + with open(filename, "w", encoding="utf8") as f: + f.write(self.get_coord_str()) + # if the charge is set, write it to a '.CHRG' file + if self._charge is not None and self._charge != 0: + with open(filename.with_suffix(".CHRG"), "w", encoding="utf8") as f: + f.write(f"{self.charge}\n") + # if the UHF is set, write it to a '.UHF' file + if self._uhf is not None and self._uhf > 0: + with open(filename.with_suffix(".UHF"), "w", encoding="utf8") as f: + f.write(f"{self.uhf}\n") + + def read_xyz_from_coord(self, filename: str | Path) -> None: + """ + Read the XYZ coordinates of the molecule from a 'coord' file. + + The layout of the file is as follows: + ``` + $coord + 0.00000000000000 0.00000000000000 3.60590687077610 u + 0.00000000000000 0.00000000000000 -3.60590687077610 u + $end + ... or ... + $coord + 0.00000000000000 0.00000000000000 2.30704866281983 u + 0.00000000000000 0.00000000000000 -2.30704866281983 u + $redundant + number_of_atoms 2 + degrees_of_freedom 1 + internal_coordinates 1 + frozen_coordinates 0 + # definitions of redundant internals + 1 k 1.0000000000000 stre 1 2 val= 4.61410 + 1 non zero eigenvalues of BmBt + 1 2.000000000 1 0 + 1 + $end + + :param filename: The name of the file to read from. + """ + with open(filename, encoding="utf8") as f: + lines = f.readlines() + if lines[0] != "$coord\n": + raise ValueError("File does not start with '$coord'.") + # number of atoms + num_atoms = 0 + for line in lines: + if line.startswith("$coord"): + continue + if ( + line.startswith("$end") + or line.startswith("$redundant") + or line.startswith("$user-defined") + ): + break + num_atoms += 1 + self.num_atoms = num_atoms + # read the atomic coordinates + self.xyz = np.zeros((self.num_atoms, 3)) + self.ati = np.zeros(self.num_atoms, dtype=int) + self.atlist = np.zeros(103, dtype=int) + for i in range(self.num_atoms): + line_entries = lines[i + 1].split() + self.xyz[i, 0] = float(line_entries[0]) * BOHR2AA + self.xyz[i, 1] = float(line_entries[1]) * BOHR2AA + self.xyz[i, 2] = float(line_entries[2]) * BOHR2AA + self.ati[i] = PSE_NUMBERS[line_entries[3].lower()] - 1 + self.atlist[self.ati[i]] += 1 + def read_charge_from_file(self, filename: str | Path): """ Read the charge of the molecule from a file. diff --git a/src/mindlessgen/molecules/refinement.py b/src/mindlessgen/molecules/refinement.py index a520ebd..6bd415a 100644 --- a/src/mindlessgen/molecules/refinement.py +++ b/src/mindlessgen/molecules/refinement.py @@ -3,6 +3,7 @@ to obtain finally a valid molecule. """ +import warnings from pathlib import Path import networkx as nx # type: ignore import numpy as np @@ -151,6 +152,8 @@ def iterative_optimization( gap_sufficient = engine.check_gap( molecule=rev_mol, threshold=config_refine.hlgap, verbosity=verbosity ) + except NotImplementedError: + warnings.warn("HOMO-LUMO gap check not implemented with this engine.") except RuntimeError as e: raise RuntimeError("HOMO-LUMO gap could not be checked.") from e if not gap_sufficient: diff --git a/src/mindlessgen/prog/__init__.py b/src/mindlessgen/prog/__init__.py index 3ccbccb..e86adc0 100644 --- a/src/mindlessgen/prog/__init__.py +++ b/src/mindlessgen/prog/__init__.py @@ -7,6 +7,7 @@ GeneralConfig, XTBConfig, ORCAConfig, + TURBOMOLEConfig, GXTBConfig, GenerateConfig, RefineConfig, @@ -18,6 +19,7 @@ "GeneralConfig", "XTBConfig", "ORCAConfig", + "TURBOMOLEConfig", "GXTBConfig", "GenerateConfig", "RefineConfig", diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index 31cd626..37af6c8 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -347,7 +347,7 @@ def element_composition( tmp[key] = composition[key] else: raise KeyError( - f"Element with atomic number {key+1} (provided key: {key}) not found in the periodic table." + f"Element with atomic number {key + 1} (provided key: {key}) not found in the periodic table." ) self._element_composition = tmp return @@ -656,8 +656,8 @@ def engine(self, engine: str): """ if not isinstance(engine, str): raise TypeError("Refinement engine should be a string.") - if engine not in ["xtb", "orca"]: - raise ValueError("Refinement engine can only be xtb or orca.") + if engine not in ["xtb", "orca", "turbomole"]: + raise ValueError("Refinement engine can only be xtb, orca or turbomole.") self._engine = engine @property @@ -723,8 +723,8 @@ def engine(self, engine: str): """ if not isinstance(engine, str): raise TypeError("Postprocess engine should be a string.") - if engine not in ["xtb", "orca", "gxtb"]: - raise ValueError("Postprocess engine can only be xtb or orca.") + if engine not in ["xtb", "orca", "gxtb", "turbomole"]: + raise ValueError("Postprocess engine can only be xtb, orca or turbomole.") self._engine = engine @property @@ -925,6 +925,104 @@ def scf_cycles(self, max_scf_cycles: int): self._scf_cycles = max_scf_cycles +class TURBOMOLEConfig(BaseConfig): + """ + Configuration class for TURBOMOLE. + """ + + def __init__(self: TURBOMOLEConfig) -> None: + self._ridft_path: str | Path = "ridft" + self._jobex_path: str | Path = "jobex" + self._functional: str = "pbe" + self._basis: str = "def2-SVP" + self._scf_cycles: int = 100 + + def get_identifier(self) -> str: + return "turbomole" + + @property + def ridft_path(self): + """ + Get the ridft path. + """ + return self._ridft_path + + @ridft_path.setter + def ridft_path(self, ridft_path: str | Path): + """ + Set the ridft path. + """ + if not isinstance(ridft_path, str | Path): + raise TypeError("ridft_path should be a string or Path.") + self._ridft_path = ridft_path + + @property + def jobex_path(self): + """ + Get the jobex path. + """ + return self._jobex_path + + @jobex_path.setter + def jobex_path(self, jobex_path: str | Path): + """ + Set the jobex path. + """ + if not isinstance(jobex_path, str | Path): + raise TypeError("jobex_path should be a string or Path.") + self._jobex_path = jobex_path + + @property + def functional(self): + """ + Get the TURBOMOLE functional/method. + """ + return self._functional + + @functional.setter + def functional(self, functional: str): + """ + Set the TURBOMOLE functional/method. + """ + if not isinstance(functional, str): + raise TypeError("Functional should be a string.") + self._functional = functional + + @property + def basis(self): + """ + Get the TURBOMOLE basis set. + """ + return self._basis + + @basis.setter + def basis(self, basis: str): + """ + Set the TURBOMOLE basis set. + """ + if not isinstance(basis, str): + raise TypeError("Basis should be a string.") + self._basis = basis + + @property + def scf_cycles(self): + """ + Get the maximum number of SCF cycles. + """ + return self._scf_cycles + + @scf_cycles.setter + def scf_cycles(self, max_scf_cycles: int): + """ + Set the maximum number of SCF cycles. + """ + if not isinstance(max_scf_cycles, int): + raise TypeError("Max SCF cycles should be an integer.") + if max_scf_cycles < 1: + raise ValueError("Max SCF cycles should be greater than 0.") + self._scf_cycles = max_scf_cycles + + class GXTBConfig(BaseConfig): """ Configuration class for g-xTB. @@ -984,6 +1082,7 @@ def __init__(self, config_file: str | Path | None = None): self.general = GeneralConfig() self.xtb = XTBConfig() self.orca = ORCAConfig() + self.turbomole = TURBOMOLEConfig() self.refine = RefineConfig() self.postprocess = PostProcessConfig() self.generate = GenerateConfig() @@ -1096,7 +1195,7 @@ def load_from_dict(self, config_dict: dict) -> None: }, "orca": { "orca_option": "opt" - } + }, } Arguments: diff --git a/src/mindlessgen/qm/__init__.py b/src/mindlessgen/qm/__init__.py index 32237f8..f16adcc 100644 --- a/src/mindlessgen/qm/__init__.py +++ b/src/mindlessgen/qm/__init__.py @@ -5,6 +5,7 @@ from .base import QMMethod from .xtb import XTB, get_xtb_path from .orca import ORCA, get_orca_path +from .tm import Turbomole, get_ridft_path, get_jobex_path from .gxtb import GXTB, get_gxtb_path __all__ = [ @@ -13,6 +14,9 @@ "QMMethod", "ORCA", "get_orca_path", + "Turbomole", + "get_ridft_path", + "get_jobex_path", "GXTB", "get_gxtb_path", ] diff --git a/src/mindlessgen/qm/tm.py b/src/mindlessgen/qm/tm.py new file mode 100644 index 0000000..c75806e --- /dev/null +++ b/src/mindlessgen/qm/tm.py @@ -0,0 +1,286 @@ +""" +This module handles all Turbomole-related functionality. +""" + +from pathlib import Path +import shutil +import subprocess as sp +from tempfile import TemporaryDirectory + +from ..molecules import Molecule +from ..prog import TURBOMOLEConfig +from .base import QMMethod + + +class Turbomole(QMMethod): + """ + This class handles all interaction with the turbomole external dependency. + """ + + def __init__( + self, + jobex_path: str | Path, + ridft_path: str | Path, + turbomolecfg: TURBOMOLEConfig, + ) -> None: + """ + Initialize the turbomole class. + """ + if isinstance(jobex_path, str): + self.jobex_path: Path = Path(jobex_path).resolve() + elif isinstance(jobex_path, Path): + self.jobex_path = jobex_path + else: + raise TypeError("jobex_path should be a string or a Path object.") + + if isinstance(ridft_path, str): + self.ridft_path: Path = Path(ridft_path).resolve() + elif isinstance(ridft_path, Path): + self.ridft_path = ridft_path + else: + raise TypeError("ridft_path should be a string or a Path object.") + self.cfg = turbomolecfg + + def optimize( + self, + molecule: Molecule, + max_cycles: int | None = None, + verbosity: int = 1, + ) -> Molecule: + """ + Optimize a molecule using Turbomole. + """ + # Create a unique temporary directory using TemporaryDirectory context manager + with TemporaryDirectory(prefix="turbomole_") as temp_dir: + temp_path = Path(temp_dir).resolve() + # write the molecule to a temporary file + molecule.write_coord_to_file(temp_path / "coord") + + # write the input file + inputname = "control" + tm_input = self._gen_input(molecule) + if verbosity > 1: + print("Turbomole input file:\n##################") + print(tm_input) + print("##################") + with open(temp_path / inputname, "w", encoding="utf8") as f: + f.write(tm_input) + + # Setup the turbomole optimization command including the max number of optimization cycles + arguments = [f"PARNODES=1 {self.jobex_path} -c {max_cycles}"] + if verbosity > 2: + print(f"Running command: {' '.join(arguments)}") + + tm_log_out, tm_log_err, return_code = self._run_opt( + temp_path=temp_path, arguments=arguments + ) + if verbosity > 2: + print(tm_log_out) + if return_code != 0: + raise RuntimeError( + f"Turbomole failed with return code {return_code}:\n{tm_log_err}" + ) + + # # revert the coord file to xyz file + coordfile = Path(temp_path / "coord").resolve() + optimized_molecule = molecule.copy() + optimized_molecule.read_xyz_from_coord(coordfile) + return optimized_molecule + + def singlepoint(self, molecule: Molecule, verbosity: int = 1) -> str: + """ + Perform a single point calculation using Turbomole. + """ + # Create a unique temporary directory using TemporaryDirectory context manager + with TemporaryDirectory(prefix="turbomole_") as temp_dir: + temp_path = Path(temp_dir).resolve() + # write the molecule to a temporary file + molfile = "coord" + molecule.write_coord_to_file(temp_path / molfile) + + # write the input file + inputname = "control" + tm_input = self._gen_input(molecule) + if verbosity > 1: + print("Turbomole input file:\n##################") + print(self._gen_input(molecule)) + print("##################") + with open(temp_path / inputname, "w", encoding="utf8") as f: + f.write(tm_input) + + # set up the turbomole single point calculation command + run_tm = [f"PARNODES=1 {self.ridft_path}"] + if verbosity > 2: + print(f"Running command: {' '.join(run_tm)}") + + tm_log_out, tm_log_err, return_code = self._run( + temp_path=temp_path, arguments=run_tm + ) + if verbosity > 2: + print(tm_log_out) + if return_code != 0: + raise RuntimeError( + f"Turbomole failed with return code {return_code}:\n{tm_log_err}" + ) + + return tm_log_out + + def check_gap( + self, molecule: Molecule, threshold: float, verbosity: int = 1 + ) -> bool: + """ + Check if the HL gap is larger than a given threshold. + """ + raise NotImplementedError("check_gap not implemented for turbomole.") + + def _run(self, temp_path: Path, arguments: list[str]) -> tuple[str, str, int]: + """ + Run turbomole with the given arguments. + + Arguments: + arguments (list[str]): The arguments to pass to turbomole. + + Returns: + tuple[str, str, int]: The output of the turbomole calculation (stdout and stderr) + and the return code + """ + try: + tm_out = sp.run( + arguments, + cwd=temp_path, + capture_output=True, + check=True, + shell=True, # shell=True is necessary for inserting the `PARNODES=xx` command, unfortunately. + ) + # get the output of the turbomole calculation (of both stdout and stderr) + tm_log_out = tm_out.stdout.decode("utf8", errors="replace") + tm_log_err = tm_out.stderr.decode("utf8", errors="replace") + + if "ridft : all done" not in tm_log_out: + raise sp.CalledProcessError( + 1, + str(self.ridft_path), + tm_log_out.encode("utf8"), + tm_log_err.encode("utf8"), + ) + return tm_log_out, tm_log_err, 0 + except sp.CalledProcessError as e: + tm_log_out = e.stdout.decode("utf8", errors="replace") + tm_log_err = e.stderr.decode("utf8", errors="replace") + return tm_log_out, tm_log_err, e.returncode + + def _run_opt(self, temp_path: Path, arguments: list[str]) -> tuple[str, str, int]: + """ + Run turbomole optimization with the given arguments. + + Arguments: + arguments (list[str]): The arguments to pass to turbomole. + + Returns: + tuple[str, str, int]: The output of the turbomole calculation (stdout and stderr) + and the return code + """ + try: + tm_out = sp.run( + arguments, + cwd=temp_path, + capture_output=True, + check=True, + shell=True, # shell=True is necessary for inserting the `PARNODES=xx` command, unfortunately. + ) + # Read the job-last file to get the output of the calculation + output_file = temp_path / "job.last" + if output_file.exists(): + with open(output_file, encoding="utf-8") as file: + tm_log_out = file.read() + tm_log_err = tm_out.stderr.decode("utf8", errors="replace") + else: + raise FileNotFoundError(f"Output file {output_file} not found.") + + if "ridft : all done" not in tm_log_out: + raise sp.CalledProcessError( + 1, + str(self.jobex_path), + tm_log_out, + tm_log_err.encode("utf8"), + ) + return tm_log_out, tm_log_err, 0 + except sp.CalledProcessError as e: + tm_log_out = e.stdout.decode("utf8", errors="replace") + tm_log_err = e.stderr.decode("utf8", errors="replace") + return tm_log_out, tm_log_err, e.returncode + + def _gen_input( + self, + molecule: Molecule, + ) -> str: + """ + Generate a default input file for Turbomole. + """ + tm_input = "$coord file=coord\n" + tm_input += f"$charge={molecule.charge} unpaired={molecule.uhf}\n" + tm_input += "$atoms\n" + tm_input += f" basis={self.cfg.basis}\n" + tm_input += "$dft\n" + tm_input += f" functional {self.cfg.functional}\n" + tm_input += " gridsize m4\n" + tm_input += "$rij\n" + if self.cfg.functional.endswith("-v"): + tm_input += "$doscnl\n" + else: + tm_input += "$disp4\n" + tm_input += f"$scfiterlimit {self.cfg.scf_cycles}\n" + tm_input += "$scfconv 7\n" + tm_input += "$energy file=energy\n" + tm_input += "$grad file=gradient\n" + tm_input += "$end" + return tm_input + + +# TODO: 1. Convert this to a @staticmethod of Class turbomole +# 2. Rename to `get_method` or similar to enable an abstract interface +# 3. Add the renamed method to the ABC `QMMethod` +# 4. In `main.py`: Remove the passing of the path finder functions as arguments +# and remove the boiler plate code to make it more general. +def get_ridft_path(binary_name: str | Path | None = None) -> Path: + """ + Retrieve the path to the Turbomole 'ridft' binary. + + Returns: + Path: The absolute path to the 'ridft' binary. + """ + default_ridft_names: list[str | Path] = ["ridft"] + # put binary name at the beginning of the list to prioritize it + if binary_name is not None: + binary_names = [binary_name] + default_ridft_names + else: + binary_names = default_ridft_names + # Get turbomole path from 'which ridft' command + for binpath in binary_names: + which_ridft = shutil.which(binpath) + if which_ridft: + ridft_path = Path(which_ridft) + return ridft_path + raise ImportError("'ridft' (TURBOMOLE DFT) binary could not be found.") + + +def get_jobex_path(binary_name: str | Path | None = None) -> Path: + """ + Retrieve the path to the Turbomole 'jobex' binary. + + Returns: + Path: The absolute path to the 'jobex' binary. + """ + default_jobex_names: list[str | Path] = ["jobex"] + # put binary name at the beginning of the list to prioritize it + if binary_name is not None: + binary_names = [binary_name] + default_jobex_names + else: + binary_names = default_jobex_names + # Get turbomole path from 'which jobex' command + for binpath in binary_names: + which_jobex = shutil.which(binpath) + if which_jobex: + jobex_path = Path(which_jobex).resolve() + return jobex_path + raise ImportError("'jobex' (TURBOMOLE) binary could not be found.") diff --git a/test/test_molecules/test_molecule.py b/test/test_molecules/test_molecule.py index 2bd1a4d..f3ff5df 100644 --- a/test/test_molecules/test_molecule.py +++ b/test/test_molecules/test_molecule.py @@ -198,6 +198,82 @@ def test_read_mol_from_file(tmp_path, filename, num_atoms, charge, expected_exce ) +@pytest.mark.parametrize( + "filename, num_atoms, charge, expected_exception", + [ + ("test_molecule", 2, 1, None), # Valid file and data + ("invalid_molecule", None, None, FileNotFoundError), # Missing file + ("corrupted_molecule", None, None, ValueError), # Invalid data + ], +) +def test_read_xyz_from_coord(tmp_path, filename, num_atoms, charge, expected_exception): + if filename == "test_molecule": + # Create valid XYZ and CHRG files for testing + coord_content = "$coord\n0.0 0.0 0.0 H\n1.8897261259077822 0.0 0.0 O\n$end\n" + chrg_content = "1\n" + elif filename == "corrupted_molecule": + # Create an invalid XYZ file + coord_content = "Invalid content\n" + chrg_content = "invalid\n" + else: + # Simulate missing file by skipping file creation + coord_content = None + chrg_content = None + + if coord_content and chrg_content: + coord_file = tmp_path / f"{filename}" + chrg_file = tmp_path / f"{filename}.CHRG" + coord_file.write_text(coord_content) + chrg_file.write_text(chrg_content) + + if filename == "corrupted_molecule": + print(f"expected_exception: {expected_exception}") + with pytest.raises(expected_exception): + Molecule.read_mol_from_coord(str(tmp_path / coord_file)) + elif filename == "invalid_molecule": + print(f"expected_exception: {expected_exception}") + with pytest.raises(expected_exception): + # load from "filename", which does not exist -> FileNotFoundError + Molecule.read_mol_from_coord(str(tmp_path / filename)) + else: + mol = Molecule.read_mol_from_coord(str(tmp_path / coord_file)) + assert mol.num_atoms == num_atoms + assert mol.charge == charge + assert mol.uhf == 0 + np.testing.assert_array_equal(mol.ati, np.array([0, 7])) + assert mol.atlist[0] == 1 + assert mol.atlist[7] == 1 + np.testing.assert_array_equal( + mol.xyz, np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]]) + ) + + +def test_write_coord_to_file(tmp_path): + mol = Molecule(name="test") + mol.num_atoms = 2 + mol.ati = np.array([0, 8], dtype=int) + mol.xyz = np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]]) + + expected_content = """$coord +0.0000000 0.0000000 0.0000000 H +1.8897261 0.0000000 0.0000000 F +$end +""" + + mol.write_coord_to_file(str(tmp_path / "test.coord")) + + assert (tmp_path / "test.coord").exists() + + # Lese den Inhalt der Datei und entferne überflüssige Leerzeichen für den Vergleich + actual_content = (tmp_path / "test.coord").read_text().strip() + normalized_actual = " ".join(actual_content.split()) + normalized_expected = " ".join(expected_content.split()) + + assert ( + normalized_actual == normalized_expected + ), f"Actual content:\n{actual_content}\nExpected content:\n{expected_content}" + + def test_copy_method(): mol = Molecule(name="original") mol.num_atoms = 2