From f6a32827a8c1885fa954bea313898724eff284b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Wed, 18 Dec 2024 13:00:31 +0100 Subject: [PATCH 01/14] Interface for the qm program turbomole MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- mindlessgen.toml | 10 +- src/mindlessgen/generator/main.py | 22 ++- src/mindlessgen/prog/__init__.py | 2 + src/mindlessgen/prog/config.py | 95 +++++++++++- src/mindlessgen/qm/__init__.py | 3 + src/mindlessgen/qm/tm.py | 239 ++++++++++++++++++++++++++++++ 6 files changed, 365 insertions(+), 6 deletions(-) create mode 100644 src/mindlessgen/qm/tm.py diff --git a/mindlessgen.toml b/mindlessgen.toml index 8151b53..f0b1761 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -60,7 +60,7 @@ hlgap = 0.5 debug = false [postprocess] -# > Engine for the post-processing part. Options: 'xtb', 'orca' +# > Engine for the post-processing part. Options: 'xtb', 'orca', 'turbomole' engine = "orca" # > Optimize geometry in the post-processing part. If `false`, only a single-point is conducted. Options: optimize = true @@ -88,3 +88,11 @@ basis = "def2-SVP" gridsize = 1 # > Maximum number of SCF cycles: Options: scf_cycles = 100 + +[turbomole] +# > Path to the turbomole executable. The names `turbomole` and `turbomole_dev` are automatically searched for. Options: +turbomole_path = "/path/to/turbomole" +# > Functional/Method: Options: +functional = "PBE" +# > Basis set: Options: +basis = "def2-SVP" diff --git a/src/mindlessgen/generator/main.py b/src/mindlessgen/generator/main.py index 8a36a54..c4a953c 100644 --- a/src/mindlessgen/generator/main.py +++ b/src/mindlessgen/generator/main.py @@ -10,7 +10,17 @@ import warnings from ..molecules import generate_random_molecule, Molecule -from ..qm import XTB, get_xtb_path, QMMethod, ORCA, get_orca_path, GP3, get_gp3_path +from ..qm import ( + XTB, + get_xtb_path, + QMMethod, + ORCA, + get_orca_path, + GP3, + get_gp3_path, + Turbomole, + get_turbomole_path, +) from ..molecules import iterative_optimization, postprocess_mol from ..prog import ConfigManager @@ -46,6 +56,7 @@ def generator(config: ConfigManager) -> tuple[list[Molecule] | None, int]: config, get_xtb_path, get_orca_path, # GP3 cannot be used anyway + get_turbomole_path, ) if config.general.postprocess: @@ -266,6 +277,7 @@ def setup_engines( cfg: ConfigManager, xtb_path_func: Callable, orca_path_func: Callable, + turbomole_path_func: Callable, gp3_path_func: Callable | None = None, ): """ @@ -287,6 +299,14 @@ def setup_engines( except ImportError as e: raise ImportError("orca not found.") from e return ORCA(path, cfg.orca) + elif engine_type == "turbomole": + try: + path = turbomole_path_func(cfg.turbomole.turbomole_path) + if not path: + raise ImportError("turbomole not found.") + except ImportError as e: + raise ImportError("turbomole not found.") from e + return Turbomole(path, cfg.turbomole) elif engine_type == "gp3": if gp3_path_func is None: raise ImportError("No callable function for determining the gp3 path.") diff --git a/src/mindlessgen/prog/__init__.py b/src/mindlessgen/prog/__init__.py index 8dee820..0f0810c 100644 --- a/src/mindlessgen/prog/__init__.py +++ b/src/mindlessgen/prog/__init__.py @@ -7,6 +7,7 @@ GeneralConfig, XTBConfig, ORCAConfig, + TURBOMOLEConfig, GenerateConfig, RefineConfig, PostProcessConfig, @@ -17,6 +18,7 @@ "GeneralConfig", "XTBConfig", "ORCAConfig", + "TURBOMOLEConfig", "GenerateConfig", "RefineConfig", "PostProcessConfig", diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index d598b4a..591fbb0 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -626,8 +626,8 @@ def engine(self, engine: str): """ if not isinstance(engine, str): raise TypeError("Refinement engine should be a string.") - if engine not in ["xtb", "orca"]: - raise ValueError("Refinement engine can only be xtb or orca.") + if engine not in ["xtb", "orca", "turbomole"]: + raise ValueError("Refinement engine can only be xtb, orca or turbomole.") self._engine = engine @property @@ -693,8 +693,8 @@ def engine(self, engine: str): """ if not isinstance(engine, str): raise TypeError("Postprocess engine should be a string.") - if engine not in ["xtb", "orca", "gp3"]: - raise ValueError("Postprocess engine can only be xtb or orca.") + if engine not in ["xtb", "orca", "gp3", "turbomole"]: + raise ValueError("Postprocess engine can only be xtb, orca turbomole.") self._engine = engine @property @@ -895,6 +895,86 @@ def scf_cycles(self, max_scf_cycles: int): self._scf_cycles = max_scf_cycles +class TURBOMOLEConfig(BaseConfig): + """ + Configuration class for TURBOMOLE. + """ + + def __init__(self: TURBOMOLEConfig) -> None: + self._turbomole_path: str | Path = "turbomole" + self._functional: str = "PBE" + self._basis: str = "def2-SVP" + self._scf_cycles: int = 100 + + def get_identifier(self) -> str: + return "turbomole" + + @property + def turbomole_path(self): + """ + Get the turbomole path. + """ + return self._turbomole_path + + @turbomole_path.setter + def turbomole_path(self, turbomole_path: str | Path): + """ + Set the turbomole path. + """ + if not isinstance(turbomole_path, str | Path): + raise TypeError("turbomole_path should be a string or Path.") + self._turbomole_path = turbomole_path + + @property + def functional(self): + """ + Get the TURBOMOLE functional/method. + """ + return self._functional + + @functional.setter + def functional(self, functional: str): + """ + Set the TURBOMOLE functional/method. + """ + if not isinstance(functional, str): + raise TypeError("Functional should be a string.") + self._functional = functional + + @property + def basis(self): + """ + Get the TURBOMOLE basis set. + """ + return self._basis + + @basis.setter + def basis(self, basis: str): + """ + Set the TURBOMOLE basis set. + """ + if not isinstance(basis, str): + raise TypeError("Basis should be a string.") + self._basis = basis + + @property + def scf_cycles(self): + """ + Get the maximum number of SCF cycles. + """ + return self._scf_cycles + + @scf_cycles.setter + def scf_cycles(self, max_scf_cycles: int): + """ + Set the maximum number of SCF cycles. + """ + if not isinstance(max_scf_cycles, int): + raise TypeError("Max SCF cycles should be an integer.") + if max_scf_cycles < 1: + raise ValueError("Max SCF cycles should be greater than 0.") + + class ConfigManager: """ Overall configuration manager for the program. @@ -907,6 +987,7 @@ def __init__(self, config_file: str | Path | None = None): self.general = GeneralConfig() self.xtb = XTBConfig() self.orca = ORCAConfig() + self.turbomole = TURBOMOLEConfig() self.refine = RefineConfig() self.postprocess = PostProcessConfig() self.generate = GenerateConfig() @@ -996,6 +1077,9 @@ def load_from_toml(self, config_file: str | Path) -> None: [orca] orca_option = "opt" + [turbomole] + turbomole_option = "opt" + Arguments: config_file (str): Path to the configuration file @@ -1018,6 +1102,9 @@ def load_from_dict(self, config_dict: dict) -> None: }, "orca": { "orca_option": "opt" + }, + "turbomole": { + "turbomole_option": "opt" } } diff --git a/src/mindlessgen/qm/__init__.py b/src/mindlessgen/qm/__init__.py index a5e8e3a..e21ef44 100644 --- a/src/mindlessgen/qm/__init__.py +++ b/src/mindlessgen/qm/__init__.py @@ -6,6 +6,7 @@ from .xtb import XTB, get_xtb_path from .orca import ORCA, get_orca_path from .gp3 import GP3, get_gp3_path +from .tm import Turbomole, get_turbomole_path __all__ = [ "XTB", @@ -15,4 +16,6 @@ "get_orca_path", "GP3", "get_gp3_path", + "Turbomole", + "get_turbomole_path", ] diff --git a/src/mindlessgen/qm/tm.py b/src/mindlessgen/qm/tm.py new file mode 100644 index 0000000..f6d43bf --- /dev/null +++ b/src/mindlessgen/qm/tm.py @@ -0,0 +1,239 @@ +""" +This module handles all Turbomole-related functionality. +""" + +from pathlib import Path +import shutil +import subprocess as sp +from tempfile import TemporaryDirectory + +from ..molecules import Molecule +from ..prog import TURBOMOLEConfig as turbomoleConfig +from .base import QMMethod + + +class Turbomole(QMMethod): + """ + This class handles all interaction with the turbomole external dependency. + """ + + def __init__(self, path: str | Path, turbomolecfg: turbomoleConfig) -> None: + """ + Initialize the turbomole class. + """ + if isinstance(path, str): + self.path: Path = Path(path).resolve() + elif isinstance(path, Path): + self.path = path + else: + raise TypeError("turbomole_path should be a string or a Path object.") + self.cfg = turbomolecfg + + def optimize( + self, molecule: Molecule, max_cycles: int | None = None, verbosity: int = 1 + ) -> Molecule: + """ + Optimize a molecule using turbomole. + """ + + # Create a unique temporary directory using TemporaryDirectory context manager + with TemporaryDirectory(prefix="turbomole_") as temp_dir: + temp_path = Path(temp_dir).resolve() + # write the molecule to a temporary file + molecule.write_xyz_to_file(temp_path / "molecule.xyz") + + inputname = "turbomole_opt.inp" + turbomole_input = self._gen_input( + molecule, "molecule.xyz", True, max_cycles + ) + if verbosity > 1: + print("turbomole input file:\n##################") + print(turbomole_input) + print("##################") + with open(temp_path / inputname, "w", encoding="utf8") as f: + f.write(turbomole_input) + + # run turbomole + arguments = [ + inputname, + ] + + turbomole_log_out, turbomole_log_err, return_code = self._run( + temp_path=temp_path, arguments=arguments + ) + if verbosity > 2: + print(turbomole_log_out) + if return_code != 0: + raise RuntimeError( + f"turbomole failed with return code {return_code}:\n{turbomole_log_err}" + ) + + # read the optimized molecule from the output file + xyzfile = Path(temp_path / inputname).resolve().with_suffix(".xyz") + optimized_molecule = molecule.copy() + optimized_molecule.read_xyz_from_file(xyzfile) + return optimized_molecule + + def singlepoint(self, molecule: Molecule, verbosity: int = 1) -> str: + """ + Perform a single point calculation using turbomole. + """ + # Create a unique temporary directory using TemporaryDirectory context manager + with TemporaryDirectory(prefix="turbomole_") as temp_dir: + temp_path = Path(temp_dir).resolve() + # write the molecule to a temporary file + molfile = "mol.xyz" + molecule.write_xyz_to_file(temp_path / molfile) + + # converte mol.xyz to tm format + arguments = [ + "x2t mol.xyz > coord", + ] + print("Running", arguments) + + # run cefine to write control file + arguments = [ + "cefine", + "-bas", + f"{self.cfg.basis}", + "-func", + f"{self.cfg.functional}", + ] + if molecule.charge != 0: + arguments += ["-chrg", str(molecule.charge)] + if molecule.uhf != 0: + arguments += ["-uhf", str(molecule.uhf)] + # if molecule.symmetry is not None: + # arguments += ["-sym", str(molecule.symmetry)] + print("Running cefine", arguments) + + # run turbomole + arguments = [ + "ridft > rifdt.out", + ] + turbomole_log_out, turbomole_log_err, return_code = self._run( + temp_path=temp_path, arguments=arguments + ) + if verbosity > 2: + print(turbomole_log_out) + if return_code != 0: + raise RuntimeError( + f"turbomole failed with return code {return_code}:\n{turbomole_log_err}" + ) + + return turbomole_log_out + + def check_gap( + self, molecule: Molecule, threshold: float, verbosity: int = 1 + ) -> bool: + """ + Check if the HL gap is larger than a given threshold. + """ + raise NotImplementedError("check_gap not implemented for turbomole.") + + def _run(self, temp_path: Path, arguments: list[str]) -> tuple[str, str, int]: + """ + Run turbomole with the given arguments. + + Arguments: + arguments (list[str]): The arguments to pass to turbomole. + + Returns: + tuple[str, str, int]: The output of the turbomole calculation (stdout and stderr) + and the return code + """ + try: + turbomole_out = sp.run( + [str(self.path)] + arguments, + cwd=temp_path, + capture_output=True, + check=True, + ) + # get the output of the turbomole calculation (of both stdout and stderr) + turbomole_log_out = turbomole_out.stdout.decode("utf8", errors="replace") + turbomole_log_err = turbomole_out.stderr.decode("utf8", errors="replace") + # check if the output contains "turbomole TERMINATED NORMALLY" + if "all done" not in turbomole_log_out: + raise sp.CalledProcessError( + 1, + str(self.path), + turbomole_log_out.encode("utf8"), + turbomole_log_err.encode("utf8"), + ) + return turbomole_log_out, turbomole_log_err, 0 + except sp.CalledProcessError as e: + turbomole_log_out = e.stdout.decode("utf8", errors="replace") + turbomole_log_err = e.stderr.decode("utf8", errors="replace") + return turbomole_log_out, turbomole_log_err, e.returncode + + def _cefine(self, molecule: Molecule) -> list[str]: + """ + Refine a molecule using turbomole. + """ + call = [ + "cefine", + "-bas", + f"{self.cfg.basis}", + "-func", + f"{self.cfg.functional}", + ] + if molecule.charge != 0: + call += ["-chrg", str(molecule.charge)] + if molecule.uhf != 0: + call += ["-uhf", str(molecule.uhf)] + # if molecule.symmetry is not None: + # arguments += ["-sym", str(molecule.symmetry)] + + return call + + def _gen_input( + self, + molecule: Molecule, + xyzfile: str, + optimization: bool = False, + opt_cycles: int | None = None, + ) -> str: + """ + Generate a default input file for turbomole. + """ + turbomole_input = f"! {self.cfg.functional} {self.cfg.basis}\n" + # turbomole_input += f"! DEFGRID{self.cfg.gridsize}\n" + turbomole_input += "! NoTRAH NoSOSCF SlowConv\n" + # "! AutoAux" keyword for super-heavy elements as def2/J ends at Rn + if any(atom >= 86 for atom in molecule.ati): + turbomole_input += "! AutoAux\n" + if optimization: + turbomole_input += "! OPT\n" + if opt_cycles is not None: + turbomole_input += f"%geom MaxIter {opt_cycles} end\n" + turbomole_input += ( + f"%scf\n\tMaxIter {self.cfg.scf_cycles}\n\tConvergence Medium\nend\n" + ) + turbomole_input += "%pal nprocs 1 end\n\n" + turbomole_input += f"* xyzfile {molecule.charge} {molecule.uhf + 1} {xyzfile}\n" + return turbomole_input + + +# TODO: 1. Convert this to a @staticmethod of Class turbomole +# 2. Rename to `get_method` or similar to enable an abstract interface +# 3. Add the renamed method to the ABC `QMMethod` +# 4. In `main.py`: Remove the passing of the path finder functions as arguments +# and remove the boiler plate code to make it more general. +def get_turbomole_path(binary_name: str | Path | None = None) -> Path: + """ + Get the path to the turbomole binary based on different possible names + that are searched for in the PATH. + """ + default_turbomole_names: list[str | Path] = ["turbomole", "turbomole_dev"] + # put binary name at the beginning of the lixt to prioritize it + if binary_name is not None: + binary_names = [binary_name] + default_turbomole_names + else: + binary_names = default_turbomole_names + # Get turbomole path from 'which turbomole' command + for binpath in binary_names: + which_turbomole = shutil.which(binpath) + if which_turbomole: + turbomole_path = Path(which_turbomole).resolve() + return turbomole_path + raise ImportError("'turbomole' binary could not be found.") From d6c60c28fd5919144b79f44adf9e1de7ef932552 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Wed, 8 Jan 2025 12:54:49 +0100 Subject: [PATCH 02/14] support for turbomole MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- CHANGELOG.md | 1 + mindlessgen.toml | 4 +- src/mindlessgen/generator/main.py | 7 +- src/mindlessgen/prog/config.py | 2 +- src/mindlessgen/qm/tm.py | 230 +++++++++++++++++------------- 5 files changed, 140 insertions(+), 104 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ac4cf3..822da61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - support for `python-3.13` - option to set a fixed molecular charge, while ensuring `uhf = 0` - `element_composition` and `forbidden_elements` can now be directly set to a `dict` or `list`, respectively, via API access +- support for the qm program turbomole. ### Breaking Changes - Removal of the `dist_threshold` flag and in the `-toml` file. diff --git a/mindlessgen.toml b/mindlessgen.toml index f0b1761..4b6c9c4 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -90,9 +90,11 @@ gridsize = 1 scf_cycles = 100 [turbomole] -# > Path to the turbomole executable. The names `turbomole` and `turbomole_dev` are automatically searched for. Options: +# > Path to the turbomole executable. The names `ridft` and `jobex` are automatically searched for. Options: turbomole_path = "/path/to/turbomole" # > Functional/Method: Options: functional = "PBE" # > Basis set: Options: basis = "def2-SVP" +# > Maximum number of SCF cycles: Options: +scf_cycles = 100 diff --git a/src/mindlessgen/generator/main.py b/src/mindlessgen/generator/main.py index c4a953c..f80d2c4 100644 --- a/src/mindlessgen/generator/main.py +++ b/src/mindlessgen/generator/main.py @@ -17,7 +17,6 @@ ORCA, get_orca_path, GP3, - get_gp3_path, Turbomole, get_turbomole_path, ) @@ -61,7 +60,11 @@ def generator(config: ConfigManager) -> tuple[list[Molecule] | None, int]: if config.general.postprocess: postprocess_engine: QMMethod | None = setup_engines( - config.postprocess.engine, config, get_xtb_path, get_orca_path, get_gp3_path + config.postprocess.engine, + config, + get_xtb_path, + get_orca_path, + get_turbomole_path, ) else: postprocess_engine = None diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index 591fbb0..f657bda 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -902,7 +902,7 @@ class TURBOMOLEConfig(BaseConfig): def __init__(self: TURBOMOLEConfig) -> None: self._turbomole_path: str | Path = "turbomole" - self._functional: str = "PBE" + self._functional: str = "pbe" self._basis: str = "def2-SVP" self._scf_cycles: int = 100 diff --git a/src/mindlessgen/qm/tm.py b/src/mindlessgen/qm/tm.py index f6d43bf..fc8a766 100644 --- a/src/mindlessgen/qm/tm.py +++ b/src/mindlessgen/qm/tm.py @@ -4,6 +4,7 @@ from pathlib import Path import shutil +import os import subprocess as sp from tempfile import TemporaryDirectory @@ -30,53 +31,84 @@ def __init__(self, path: str | Path, turbomolecfg: turbomoleConfig) -> None: self.cfg = turbomolecfg def optimize( - self, molecule: Molecule, max_cycles: int | None = None, verbosity: int = 1 + self, + molecule: Molecule, + max_cycles: int | None = None, + verbosity: int = 1, ) -> Molecule: """ - Optimize a molecule using turbomole. + Optimize a molecule using ORCA. """ # Create a unique temporary directory using TemporaryDirectory context manager with TemporaryDirectory(prefix="turbomole_") as temp_dir: temp_path = Path(temp_dir).resolve() # write the molecule to a temporary file - molecule.write_xyz_to_file(temp_path / "molecule.xyz") + molfile = "molecule.xyz" + molecule.write_xyz_to_file(temp_path / molfile) - inputname = "turbomole_opt.inp" - turbomole_input = self._gen_input( - molecule, "molecule.xyz", True, max_cycles - ) + # convert molfile to coord file (tm format) + command = f"x2t {temp_path / molfile} > {temp_path / 'coord'}" + + try: + # run the command in a shell + sp.run(command, shell=True, check=True) + except sp.CalledProcessError as e: + print(f"The xyz file could not be converted to a coord file: {e}") + + if verbosity > 2: + with open(temp_path / "coord", encoding="utf8") as f: + tm_coordinates = f.read() + print(tm_coordinates) + + # write the input file + inputname = "control" + tm_input = self._gen_input(molecule) if verbosity > 1: - print("turbomole input file:\n##################") - print(turbomole_input) + print("Turbomole input file:\n##################") + print(tm_input) print("##################") with open(temp_path / inputname, "w", encoding="utf8") as f: - f.write(turbomole_input) + f.write(tm_input) - # run turbomole + # Setup the turbomole optimization command including the max number of optimization cycles arguments = [ - inputname, + "jobex", + "-ri", + "-c", + f"{max_cycles}", + ">", + "jobex.out", ] - turbomole_log_out, turbomole_log_err, return_code = self._run( + if verbosity > 2: + print(f"Running command: {' '.join(arguments)}") + + tm_log_out, tm_log_err, return_code = self._run( temp_path=temp_path, arguments=arguments ) if verbosity > 2: - print(turbomole_log_out) + print(tm_log_out) if return_code != 0: raise RuntimeError( - f"turbomole failed with return code {return_code}:\n{turbomole_log_err}" + f"Turbomole failed with return code {return_code}:\n{tm_log_err}" ) + # revert the coord file to xyz file + revert_command = f"t2x {temp_path / 'coord'} > {temp_path / 'molecule.xyz'}" + try: + sp.run(revert_command, shell=True, check=True) + except sp.CalledProcessError as e: + print(f"The coord file could not be converted to a xyz file: {e}") # read the optimized molecule from the output file - xyzfile = Path(temp_path / inputname).resolve().with_suffix(".xyz") + xyzfile = Path(temp_path / "molecule.xyz").resolve().with_suffix(".xyz") optimized_molecule = molecule.copy() optimized_molecule.read_xyz_from_file(xyzfile) return optimized_molecule def singlepoint(self, molecule: Molecule, verbosity: int = 1) -> str: """ - Perform a single point calculation using turbomole. + Perform a single point calculation using Turbomole. """ # Create a unique temporary directory using TemporaryDirectory context manager with TemporaryDirectory(prefix="turbomole_") as temp_dir: @@ -84,44 +116,44 @@ def singlepoint(self, molecule: Molecule, verbosity: int = 1) -> str: # write the molecule to a temporary file molfile = "mol.xyz" molecule.write_xyz_to_file(temp_path / molfile) + print(temp_path / molfile) - # converte mol.xyz to tm format - arguments = [ - "x2t mol.xyz > coord", - ] - print("Running", arguments) + # convert molfile to coord file + command = f"x2t {temp_path / molfile} > {temp_path / 'coord'}" - # run cefine to write control file - arguments = [ - "cefine", - "-bas", - f"{self.cfg.basis}", - "-func", - f"{self.cfg.functional}", - ] - if molecule.charge != 0: - arguments += ["-chrg", str(molecule.charge)] - if molecule.uhf != 0: - arguments += ["-uhf", str(molecule.uhf)] - # if molecule.symmetry is not None: - # arguments += ["-sym", str(molecule.symmetry)] - print("Running cefine", arguments) - - # run turbomole - arguments = [ - "ridft > rifdt.out", - ] - turbomole_log_out, turbomole_log_err, return_code = self._run( - temp_path=temp_path, arguments=arguments + try: + # run the command in a shell + sp.run(command, shell=True, check=True) + except sp.CalledProcessError as e: + print(f"The xyz file could not be converted to a coord file: {e}") + with open(temp_path / "coord", encoding="utf8") as f: + content = f.read() + print(content) + + # write the input file + inputname = "control" + tm_input = self._gen_input(molecule) + if verbosity > 1: + print("Turbomole input file:\n##################") + print(self._gen_input(molecule)) + print("##################") + with open(temp_path / inputname, "w", encoding="utf8") as f: + f.write(tm_input) + + # set up the turbomole single point calculation command + run_tm = ["ridft"] + + tm_log_out, tm_log_err, return_code = self._run( + temp_path=temp_path, arguments=run_tm ) if verbosity > 2: - print(turbomole_log_out) + print(tm_log_out) if return_code != 0: raise RuntimeError( - f"turbomole failed with return code {return_code}:\n{turbomole_log_err}" + f"Turbomole failed with return code {return_code}:\n{tm_log_err}" ) - return turbomole_log_out + return tm_log_out def check_gap( self, molecule: Molecule, threshold: float, verbosity: int = 1 @@ -142,76 +174,74 @@ def _run(self, temp_path: Path, arguments: list[str]) -> tuple[str, str, int]: tuple[str, str, int]: The output of the turbomole calculation (stdout and stderr) and the return code """ + try: - turbomole_out = sp.run( - [str(self.path)] + arguments, + # Change the enviroment variable to run the calculation on one cpu + original_threads = os.environ.get("PARNODES") + os.environ["PARNODES"] = "1" + + print(f"Temporäre Einstellung von PARNODES: {os.environ['PARNODES']}") + + sp.run( + arguments, cwd=temp_path, capture_output=True, check=True, + env={"PARNODES": "1", **os.environ}, ) - # get the output of the turbomole calculation (of both stdout and stderr) - turbomole_log_out = turbomole_out.stdout.decode("utf8", errors="replace") - turbomole_log_err = turbomole_out.stderr.decode("utf8", errors="replace") - # check if the output contains "turbomole TERMINATED NORMALLY" - if "all done" not in turbomole_log_out: + + # Read the job-last file to get the output of the calculation + output_file = temp_path / "job.last" + if output_file.exists(): + with open(output_file, encoding="utf-8") as file: + file_content = file.read() + turbomole_log_out = file_content + turbomole_log_err = "" + else: + raise FileNotFoundError(f"Output file {output_file} not found.") + + if "ridft : all done" not in turbomole_log_out: raise sp.CalledProcessError( 1, - str(self.path), + str(output_file), turbomole_log_out.encode("utf8"), turbomole_log_err.encode("utf8"), ) + # Revert the enviroment variable to the original setting + if original_threads is None: + os.environ.pop("PARNODES", None) + else: + os.environ["PARNODES"] = original_threads + + print( + f"Zurückgesetzte Einstellung von PARNODES: {os.environ.get('PARNODES', 'nicht gesetzt')}" + ) return turbomole_log_out, turbomole_log_err, 0 except sp.CalledProcessError as e: turbomole_log_out = e.stdout.decode("utf8", errors="replace") turbomole_log_err = e.stderr.decode("utf8", errors="replace") return turbomole_log_out, turbomole_log_err, e.returncode - def _cefine(self, molecule: Molecule) -> list[str]: - """ - Refine a molecule using turbomole. - """ - call = [ - "cefine", - "-bas", - f"{self.cfg.basis}", - "-func", - f"{self.cfg.functional}", - ] - if molecule.charge != 0: - call += ["-chrg", str(molecule.charge)] - if molecule.uhf != 0: - call += ["-uhf", str(molecule.uhf)] - # if molecule.symmetry is not None: - # arguments += ["-sym", str(molecule.symmetry)] - - return call - def _gen_input( self, molecule: Molecule, - xyzfile: str, - optimization: bool = False, - opt_cycles: int | None = None, ) -> str: """ - Generate a default input file for turbomole. + Generate a default input file for Turbomole. """ - turbomole_input = f"! {self.cfg.functional} {self.cfg.basis}\n" - # turbomole_input += f"! DEFGRID{self.cfg.gridsize}\n" - turbomole_input += "! NoTRAH NoSOSCF SlowConv\n" - # "! AutoAux" keyword for super-heavy elements as def2/J ends at Rn - if any(atom >= 86 for atom in molecule.ati): - turbomole_input += "! AutoAux\n" - if optimization: - turbomole_input += "! OPT\n" - if opt_cycles is not None: - turbomole_input += f"%geom MaxIter {opt_cycles} end\n" - turbomole_input += ( - f"%scf\n\tMaxIter {self.cfg.scf_cycles}\n\tConvergence Medium\nend\n" - ) - turbomole_input += "%pal nprocs 1 end\n\n" - turbomole_input += f"* xyzfile {molecule.charge} {molecule.uhf + 1} {xyzfile}\n" - return turbomole_input + tm_input = "$coord file=coord\n" + tm_input += f"$charge={molecule.charge} unpaired={molecule.uhf}\n" + tm_input += "$symmetry c1\n" + tm_input += "$atoms\n" + tm_input += f" basis={self.cfg.basis}\n" + tm_input += "$dft\n" + tm_input += f" functional {self.cfg.functional}\n" + tm_input += "$rij\n" + tm_input += f"$scfiterlimit {self.cfg.scf_cycles}\n" + tm_input += "$energy file=energy\n" + tm_input += "$grad file=gradient\n" + tm_input += "$end" + return tm_input # TODO: 1. Convert this to a @staticmethod of Class turbomole @@ -224,7 +254,7 @@ def get_turbomole_path(binary_name: str | Path | None = None) -> Path: Get the path to the turbomole binary based on different possible names that are searched for in the PATH. """ - default_turbomole_names: list[str | Path] = ["turbomole", "turbomole_dev"] + default_turbomole_names: list[str | Path] = ["ridft", "jobex"] # put binary name at the beginning of the lixt to prioritize it if binary_name is not None: binary_names = [binary_name] + default_turbomole_names @@ -232,8 +262,8 @@ def get_turbomole_path(binary_name: str | Path | None = None) -> Path: binary_names = default_turbomole_names # Get turbomole path from 'which turbomole' command for binpath in binary_names: - which_turbomole = shutil.which(binpath) - if which_turbomole: - turbomole_path = Path(which_turbomole).resolve() - return turbomole_path + which_ridft = shutil.which(binpath) + if which_ridft: + ridft_path = Path(which_ridft).resolve() + return ridft_path raise ImportError("'turbomole' binary could not be found.") From 0a20340cc1178b12cd0f983575c347afe3707552 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Wed, 8 Jan 2025 14:27:18 +0100 Subject: [PATCH 03/14] corrected some typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- src/mindlessgen/prog/config.py | 2 +- src/mindlessgen/qm/tm.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index f657bda..0e549fd 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -694,7 +694,7 @@ def engine(self, engine: str): if not isinstance(engine, str): raise TypeError("Postprocess engine should be a string.") if engine not in ["xtb", "orca", "gp3", "turbomole"]: - raise ValueError("Postprocess engine can only be xtb, orca turbomole.") + raise ValueError("Postprocess engine can only be xtb, orca or turbomole.") self._engine = engine @property diff --git a/src/mindlessgen/qm/tm.py b/src/mindlessgen/qm/tm.py index fc8a766..0d19305 100644 --- a/src/mindlessgen/qm/tm.py +++ b/src/mindlessgen/qm/tm.py @@ -180,7 +180,7 @@ def _run(self, temp_path: Path, arguments: list[str]) -> tuple[str, str, int]: original_threads = os.environ.get("PARNODES") os.environ["PARNODES"] = "1" - print(f"Temporäre Einstellung von PARNODES: {os.environ['PARNODES']}") + print(f"Temporal setting for PARNODES: {os.environ['PARNODES']}") sp.run( arguments, @@ -214,7 +214,7 @@ def _run(self, temp_path: Path, arguments: list[str]) -> tuple[str, str, int]: os.environ["PARNODES"] = original_threads print( - f"Zurückgesetzte Einstellung von PARNODES: {os.environ.get('PARNODES', 'nicht gesetzt')}" + f"Revert the settings for PARNODES: {os.environ.get('PARNODES', 'not set')}" ) return turbomole_log_out, turbomole_log_err, 0 except sp.CalledProcessError as e: @@ -260,7 +260,7 @@ def get_turbomole_path(binary_name: str | Path | None = None) -> Path: binary_names = [binary_name] + default_turbomole_names else: binary_names = default_turbomole_names - # Get turbomole path from 'which turbomole' command + # Get turbomole path from 'which ridft' command for binpath in binary_names: which_ridft = shutil.which(binpath) if which_ridft: From 8bf6d513b24cef6f9beacca7c05ccac04ad85580 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Fri, 10 Jan 2025 14:45:33 +0100 Subject: [PATCH 04/14] changed the PARNODES command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- src/mindlessgen/prog/config.py | 1 + src/mindlessgen/qm/tm.py | 51 +++++++++++----------------------- 2 files changed, 17 insertions(+), 35 deletions(-) diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index 0e549fd..c0cf2f4 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -973,6 +973,7 @@ def scf_cycles(self, max_scf_cycles: int): raise TypeError("Max SCF cycles should be an integer.") if max_scf_cycles < 1: raise ValueError("Max SCF cycles should be greater than 0.") + self._scf_cycles = max_scf_cycles class ConfigManager: diff --git a/src/mindlessgen/qm/tm.py b/src/mindlessgen/qm/tm.py index 0d19305..71148d8 100644 --- a/src/mindlessgen/qm/tm.py +++ b/src/mindlessgen/qm/tm.py @@ -4,7 +4,6 @@ from pathlib import Path import shutil -import os import subprocess as sp from tempfile import TemporaryDirectory @@ -72,14 +71,7 @@ def optimize( f.write(tm_input) # Setup the turbomole optimization command including the max number of optimization cycles - arguments = [ - "jobex", - "-ri", - "-c", - f"{max_cycles}", - ">", - "jobex.out", - ] + arguments = [f"PARNODES=1 jobex -ri -c {max_cycles} > jobex.out"] if verbosity > 2: print(f"Running command: {' '.join(arguments)}") @@ -141,7 +133,7 @@ def singlepoint(self, molecule: Molecule, verbosity: int = 1) -> str: f.write(tm_input) # set up the turbomole single point calculation command - run_tm = ["ridft"] + run_tm = ["PARNODES=1 ridft > ridft.out"] tm_log_out, tm_log_err, return_code = self._run( temp_path=temp_path, arguments=run_tm @@ -174,31 +166,29 @@ def _run(self, temp_path: Path, arguments: list[str]) -> tuple[str, str, int]: tuple[str, str, int]: The output of the turbomole calculation (stdout and stderr) and the return code """ - try: - # Change the enviroment variable to run the calculation on one cpu - original_threads = os.environ.get("PARNODES") - os.environ["PARNODES"] = "1" - - print(f"Temporal setting for PARNODES: {os.environ['PARNODES']}") - sp.run( arguments, cwd=temp_path, capture_output=True, check=True, - env={"PARNODES": "1", **os.environ}, + shell=True, ) - - # Read the job-last file to get the output of the calculation - output_file = temp_path / "job.last" - if output_file.exists(): - with open(output_file, encoding="utf-8") as file: - file_content = file.read() - turbomole_log_out = file_content + if "PARNODES=1 ridft > ridft.out" in arguments[0]: + with open(temp_path / "ridft.out", encoding="utf-8") as file: + ridft_file = file.read() + turbomole_log_out = ridft_file turbomole_log_err = "" else: - raise FileNotFoundError(f"Output file {output_file} not found.") + # Read the job-last file to get the output of the calculation + output_file = temp_path / "job.last" + if output_file.exists(): + with open(output_file, encoding="utf-8") as file: + file_content = file.read() + turbomole_log_out = file_content + turbomole_log_err = "" + else: + raise FileNotFoundError(f"Output file {output_file} not found.") if "ridft : all done" not in turbomole_log_out: raise sp.CalledProcessError( @@ -207,15 +197,6 @@ def _run(self, temp_path: Path, arguments: list[str]) -> tuple[str, str, int]: turbomole_log_out.encode("utf8"), turbomole_log_err.encode("utf8"), ) - # Revert the enviroment variable to the original setting - if original_threads is None: - os.environ.pop("PARNODES", None) - else: - os.environ["PARNODES"] = original_threads - - print( - f"Revert the settings for PARNODES: {os.environ.get('PARNODES', 'not set')}" - ) return turbomole_log_out, turbomole_log_err, 0 except sp.CalledProcessError as e: turbomole_log_out = e.stdout.decode("utf8", errors="replace") From b8b683f767b3d006414c4fc336d72370cdacff60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Tue, 14 Jan 2025 11:17:09 +0100 Subject: [PATCH 05/14] coord handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- CHANGELOG.md | 2 +- src/mindlessgen/molecules/molecule.py | 158 +++++++++++++++++++++++++- src/mindlessgen/qm/tm.py | 45 ++++---- 3 files changed, 180 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e23df0..8ae876b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,7 +38,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - support for `python-3.13` - option to set a fixed molecular charge, while ensuring `uhf = 0` - `element_composition` and `forbidden_elements` can now be directly set to a `dict` or `list`, respectively, via API access -- support for the qm program turbomole. +- support for TURBOMOLE as QM engine. ### Breaking Changes - Removal of the `dist_threshold` flag and in the `-toml` file. diff --git a/src/mindlessgen/molecules/molecule.py b/src/mindlessgen/molecules/molecule.py index 4928a5b..6717c06 100644 --- a/src/mindlessgen/molecules/molecule.py +++ b/src/mindlessgen/molecules/molecule.py @@ -134,6 +134,11 @@ PSE_NUMBERS: dict[str, int] = {k.lower(): v for v, k in PSE.items()} PSE_SYMBOLS: dict[int, str] = {v: k.lower() for v, k in PSE.items()} +BOHR2AA = ( + 0.529177210544 # taken from https://physics.nist.gov/cgi-bin/cuu/Value?bohrrada0 +) +AA2BOHR = 1 / BOHR2AA + class Molecule: """ @@ -264,6 +269,43 @@ def read_mol_from_file(file: str | Path) -> Molecule: molecule.name = file_path.stem return molecule + @staticmethod + def read_mol_from_coord(file: str | Path) -> Molecule: + """ + Read the XYZ coordinates and the charge of the molecule from a 'coord' file. + Thereby, generate a completely new molecule object from scratch. + + Can be called like this: + from molecule import Molecule + # Call the static method using the class name + coord_file = "coord" + molecule_instance = Molecule.read_mol_from_coord(coord_file) + # Now you can use the molecule_instance as needed + print(molecule_instance.name) + + :param file: The 'coord' file to read from. + :return: A new instance of Molecule with the read data. + """ + molecule = Molecule() + if isinstance(file, str): + file_path = Path(file).resolve() + elif isinstance(file, Path): + file_path = file.resolve() + else: + raise TypeError("String or Path expected.") + molecule.read_xyz_from_coord(file_path) + uhf_path = file_path.parent / ".UHF" + if uhf_path.exists(): + molecule.read_uhf_from_file(uhf_path) + else: + molecule.uhf = 0 + chrg_path = file_path.parent / ".CHRG" + if chrg_path.exists(): + molecule.read_charge_from_file(chrg_path) + else: + molecule.charge = 0 + return molecule + @property def name(self) -> str: """ @@ -480,7 +522,7 @@ def get_xyz_str(self) -> str: xyz_str += commentline for i in range(self.num_atoms): xyz_str += ( - f"{PSE[self.ati[i]+1]:<5} " + f"{PSE[self.ati[i] + 1]:<5} " + f"{self.xyz[i, 0]:>12.7f} " + f"{self.xyz[i, 1]:>12.7f} " + f"{self.xyz[i, 2]:>12.7f}\n" @@ -558,6 +600,120 @@ def read_xyz_from_file(self, filename: str | Path) -> None: self.xyz[i, 2] = float(line[3]) self.atlist[self.ati[i]] += 1 + def get_coord_str(self) -> str: + """ + Obtain a string with the full 'coord' file information of the molecule. + """ + coord_str = "$coord\n" + for i in range(self.num_atoms): + coord_str += ( + f"{self.xyz[i, 0] / BOHR2AA:>20.14f} " + + f"{self.xyz[i, 1] / BOHR2AA:>20.14f} " + + f"{self.xyz[i, 2] / BOHR2AA:>20.14f} " + + f"{PSE[self.ati[i] + 1]}\n" + ) + coord_str += "$end\n" + return coord_str + + def write_coord_to_file(self, filename: str | Path | None = None): + """ + Write the 'coord' file of the molecule to a file. + + The layout of the file is as follows: + ``` + $coord + + + ... + $end + ``` + + :param filename: The name of the file to write to. + """ + # raise an error if the number of atoms is not set + if self._num_atoms is None: + raise ValueError("Number of atoms not set.") + if not self._ati.size: + raise ValueError("Atomic numbers not set.") + if not self._xyz.size: + raise ValueError("Atomic coordinates not set.") + + if filename: + if not isinstance(filename, Path): + filename = Path(filename).resolve() + else: + filename = Path("mlm_" + self.name + ".coord").resolve() + + with open(filename, "w", encoding="utf8") as f: + f.write(self.get_coord_str()) + # if the charge is set, write it to a '.CHRG' file + if self._charge is not None and self._charge != 0: + chrg_filename = filename.parent / ".CHRG" + with open(chrg_filename, "w", encoding="utf8") as f: + f.write(f"{self.charge}\n") + # if the UHF is set, write it to a '.UHF' file + if self._uhf is not None and self._uhf > 0: + uhf_filename = filename.parent / ".UHF" + with open(uhf_filename, "w", encoding="utf8") as f: + f.write(f"{self.uhf}\n") + + def read_xyz_from_coord(self, filename: str | Path) -> None: + """ + Read the XYZ coordinates of the molecule from a 'coord' file. + + The layout of the file is as follows: + ``` + $coord + 0.00000000000000 0.00000000000000 3.60590687077610 u + 0.00000000000000 0.00000000000000 -3.60590687077610 u + 0.00000000000000 2.74684941070244 0.00000000000000 F + 3.72662552762076 0.00000000000000 5.36334486193405 F + -3.72662552762076 0.00000000000000 5.36334486193405 F + 3.72662552762076 0.00000000000000 -5.36334486193405 F + 0.00000000000000 -2.74684941070244 0.00000000000000 F + -3.72662552762076 0.00000000000000 -5.36334486193405 F + $end + ... or ... + $coord + 0.00000000000000 0.00000000000000 2.30704866281983 u + 0.00000000000000 0.00000000000000 -2.30704866281983 u + $redundant + number_of_atoms 2 + degrees_of_freedom 1 + internal_coordinates 1 + frozen_coordinates 0 + # definitions of redundant internals + 1 k 1.0000000000000 stre 1 2 val= 4.61410 + 1 non zero eigenvalues of BmBt + 1 2.000000000 1 0 + 1 + $end + + :param filename: The name of the file to read from. + """ + with open(filename, encoding="utf8") as f: + lines = f.readlines() + # number of atoms + num_atoms = 0 + for line in lines: + if line.startswith("$coord"): + continue + if line.startswith("$end") or line.startswith("$redundant"): + break + num_atoms += 1 + self.num_atoms = num_atoms + # read the atomic coordinates + self.xyz = np.zeros((self.num_atoms, 3)) + self.ati = np.zeros(self.num_atoms, dtype=int) + self.atlist = np.zeros(103, dtype=int) + for i in range(self.num_atoms): + line_entries = lines[i + 1].split() + self.xyz[i, 0] = float(line_entries[0]) * BOHR2AA + self.xyz[i, 1] = float(line_entries[1]) * BOHR2AA + self.xyz[i, 2] = float(line_entries[2]) * BOHR2AA + self.ati[i] = PSE_NUMBERS[line_entries[3].lower()] - 1 + self.atlist[self.ati[i]] += 1 + def read_charge_from_file(self, filename: str | Path): """ Read the charge of the molecule from a file. diff --git a/src/mindlessgen/qm/tm.py b/src/mindlessgen/qm/tm.py index 71148d8..6c3be1b 100644 --- a/src/mindlessgen/qm/tm.py +++ b/src/mindlessgen/qm/tm.py @@ -8,7 +8,7 @@ from tempfile import TemporaryDirectory from ..molecules import Molecule -from ..prog import TURBOMOLEConfig as turbomoleConfig +from ..prog import TURBOMOLEConfig from .base import QMMethod @@ -17,7 +17,7 @@ class Turbomole(QMMethod): This class handles all interaction with the turbomole external dependency. """ - def __init__(self, path: str | Path, turbomolecfg: turbomoleConfig) -> None: + def __init__(self, path: str | Path, turbomolecfg: TURBOMOLEConfig) -> None: """ Initialize the turbomole class. """ @@ -43,22 +43,20 @@ def optimize( with TemporaryDirectory(prefix="turbomole_") as temp_dir: temp_path = Path(temp_dir).resolve() # write the molecule to a temporary file - molfile = "molecule.xyz" - molecule.write_xyz_to_file(temp_path / molfile) + molecule.write_coord_to_file(temp_path / "coord") - # convert molfile to coord file (tm format) - command = f"x2t {temp_path / molfile} > {temp_path / 'coord'}" + # # convert molfile to coord file (tm format) + # command = f"x2t {temp_path / molfile} > {temp_path / 'coord'}" - try: - # run the command in a shell - sp.run(command, shell=True, check=True) - except sp.CalledProcessError as e: - print(f"The xyz file could not be converted to a coord file: {e}") + # try: + # # run the command in a shell + # sp.run(command, shell=True, check=True) + # except sp.CalledProcessError as e: + # print(f"The xyz file could not be converted to a coord file: {e}") - if verbosity > 2: - with open(temp_path / "coord", encoding="utf8") as f: - tm_coordinates = f.read() - print(tm_coordinates) + with open(temp_path / "coord", encoding="utf8") as f: + coord_content = f.read() + print(coord_content) # write the input file inputname = "control" @@ -86,16 +84,17 @@ def optimize( f"Turbomole failed with return code {return_code}:\n{tm_log_err}" ) - # revert the coord file to xyz file - revert_command = f"t2x {temp_path / 'coord'} > {temp_path / 'molecule.xyz'}" - try: - sp.run(revert_command, shell=True, check=True) - except sp.CalledProcessError as e: - print(f"The coord file could not be converted to a xyz file: {e}") + # # revert the coord file to xyz file + + # revert_command = f"t2x {temp_path / 'coord'} > {temp_path / 'molecule.xyz'}" + # try: + # sp.run(revert_command, shell=True, check=True) + # except sp.CalledProcessError as e: + # print(f"The coord file could not be converted to a xyz file: {e}") # read the optimized molecule from the output file - xyzfile = Path(temp_path / "molecule.xyz").resolve().with_suffix(".xyz") + xyzfile = Path(temp_path / "coord").resolve() optimized_molecule = molecule.copy() - optimized_molecule.read_xyz_from_file(xyzfile) + optimized_molecule.read_xyz_from_coord(xyzfile) return optimized_molecule def singlepoint(self, molecule: Molecule, verbosity: int = 1) -> str: From 38484aaec8751ddeee7452d14555ec4fecfe8021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Tue, 14 Jan 2025 16:58:33 +0100 Subject: [PATCH 06/14] requestet changes are build in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- src/mindlessgen/molecules/molecule.py | 6 +- src/mindlessgen/prog/config.py | 3 - src/mindlessgen/qm/tm.py | 80 +++++++++------------------ 3 files changed, 32 insertions(+), 57 deletions(-) diff --git a/src/mindlessgen/molecules/molecule.py b/src/mindlessgen/molecules/molecule.py index 6717c06..a3187c6 100644 --- a/src/mindlessgen/molecules/molecule.py +++ b/src/mindlessgen/molecules/molecule.py @@ -698,7 +698,11 @@ def read_xyz_from_coord(self, filename: str | Path) -> None: for line in lines: if line.startswith("$coord"): continue - if line.startswith("$end") or line.startswith("$redundant"): + if ( + line.startswith("$end") + or line.startswith("$redundant") + or line.startswith("$user-defined") + ): break num_atoms += 1 self.num_atoms = num_atoms diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index be0a1bf..468aa37 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -1182,9 +1182,6 @@ def load_from_dict(self, config_dict: dict) -> None: "orca": { "orca_option": "opt" }, - "turbomole": { - "turbomole_option": "opt" - } } Arguments: diff --git a/src/mindlessgen/qm/tm.py b/src/mindlessgen/qm/tm.py index 6c3be1b..eee939d 100644 --- a/src/mindlessgen/qm/tm.py +++ b/src/mindlessgen/qm/tm.py @@ -45,19 +45,6 @@ def optimize( # write the molecule to a temporary file molecule.write_coord_to_file(temp_path / "coord") - # # convert molfile to coord file (tm format) - # command = f"x2t {temp_path / molfile} > {temp_path / 'coord'}" - - # try: - # # run the command in a shell - # sp.run(command, shell=True, check=True) - # except sp.CalledProcessError as e: - # print(f"The xyz file could not be converted to a coord file: {e}") - - with open(temp_path / "coord", encoding="utf8") as f: - coord_content = f.read() - print(coord_content) - # write the input file inputname = "control" tm_input = self._gen_input(molecule) @@ -69,8 +56,7 @@ def optimize( f.write(tm_input) # Setup the turbomole optimization command including the max number of optimization cycles - arguments = [f"PARNODES=1 jobex -ri -c {max_cycles} > jobex.out"] - + arguments = [f"PARNODES=1 jobex -ri -c {max_cycles}"] if verbosity > 2: print(f"Running command: {' '.join(arguments)}") @@ -85,13 +71,6 @@ def optimize( ) # # revert the coord file to xyz file - - # revert_command = f"t2x {temp_path / 'coord'} > {temp_path / 'molecule.xyz'}" - # try: - # sp.run(revert_command, shell=True, check=True) - # except sp.CalledProcessError as e: - # print(f"The coord file could not be converted to a xyz file: {e}") - # read the optimized molecule from the output file xyzfile = Path(temp_path / "coord").resolve() optimized_molecule = molecule.copy() optimized_molecule.read_xyz_from_coord(xyzfile) @@ -105,22 +84,10 @@ def singlepoint(self, molecule: Molecule, verbosity: int = 1) -> str: with TemporaryDirectory(prefix="turbomole_") as temp_dir: temp_path = Path(temp_dir).resolve() # write the molecule to a temporary file - molfile = "mol.xyz" - molecule.write_xyz_to_file(temp_path / molfile) + molfile = "coord" + molecule.write_coord_to_file(temp_path / molfile) print(temp_path / molfile) - # convert molfile to coord file - command = f"x2t {temp_path / molfile} > {temp_path / 'coord'}" - - try: - # run the command in a shell - sp.run(command, shell=True, check=True) - except sp.CalledProcessError as e: - print(f"The xyz file could not be converted to a coord file: {e}") - with open(temp_path / "coord", encoding="utf8") as f: - content = f.read() - print(content) - # write the input file inputname = "control" tm_input = self._gen_input(molecule) @@ -166,41 +133,41 @@ def _run(self, temp_path: Path, arguments: list[str]) -> tuple[str, str, int]: and the return code """ try: - sp.run( + tm_out = sp.run( arguments, cwd=temp_path, capture_output=True, check=True, shell=True, ) + + # get the output of the turbomole calculation (of both stdout and stderr) if "PARNODES=1 ridft > ridft.out" in arguments[0]: with open(temp_path / "ridft.out", encoding="utf-8") as file: - ridft_file = file.read() - turbomole_log_out = ridft_file - turbomole_log_err = "" + tm_log_out = file.read() + tm_log_err = tm_out.stderr.decode("utf8", errors="replace") else: # Read the job-last file to get the output of the calculation output_file = temp_path / "job.last" if output_file.exists(): with open(output_file, encoding="utf-8") as file: - file_content = file.read() - turbomole_log_out = file_content - turbomole_log_err = "" + tm_log_out = file.read() + tm_log_err = tm_out.stderr.decode("utf8", errors="replace") else: raise FileNotFoundError(f"Output file {output_file} not found.") - if "ridft : all done" not in turbomole_log_out: + if "ridft : all done" not in tm_log_out: raise sp.CalledProcessError( 1, str(output_file), - turbomole_log_out.encode("utf8"), - turbomole_log_err.encode("utf8"), + tm_log_out.encode("utf8"), + tm_log_err.encode("utf8"), ) - return turbomole_log_out, turbomole_log_err, 0 + return tm_log_out, tm_log_err, 0 except sp.CalledProcessError as e: - turbomole_log_out = e.stdout.decode("utf8", errors="replace") - turbomole_log_err = e.stderr.decode("utf8", errors="replace") - return turbomole_log_out, turbomole_log_err, e.returncode + tm_log_out = e.stdout.decode("utf8", errors="replace") + tm_log_err = e.stderr.decode("utf8", errors="replace") + return tm_log_out, tm_log_err, e.returncode def _gen_input( self, @@ -231,10 +198,17 @@ def _gen_input( # and remove the boiler plate code to make it more general. def get_turbomole_path(binary_name: str | Path | None = None) -> Path: """ - Get the path to the turbomole binary based on different possible names - that are searched for in the PATH. + Retrieve the path to the Turbomole 'ridft' binary. + + This function searches for the 'ridft' script in the system PATH and returns + its absolute path. It is assumed that other Turbomole binaries, such as 'jobex', + are located in the same directory as 'ridft'. The path to 'ridft' will be used + as the reference point for accessing other Turbomole binaries. + + Returns: + Path: The absolute path to the 'ridft' binary. """ - default_turbomole_names: list[str | Path] = ["ridft", "jobex"] + default_turbomole_names: list[str | Path] = ["ridft"] # put binary name at the beginning of the lixt to prioritize it if binary_name is not None: binary_names = [binary_name] + default_turbomole_names From cb92f2800f5f8d5190986f3925898c64586ac78f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Thu, 16 Jan 2025 11:26:43 +0100 Subject: [PATCH 07/14] Implementation of the requested changes and the test of the read_mol_from_coord function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- mindlessgen.toml | 6 +- src/mindlessgen/generator/main.py | 39 ++++++--- src/mindlessgen/molecules/molecule.py | 37 ++++----- src/mindlessgen/prog/config.py | 37 ++++++--- src/mindlessgen/qm/__init__.py | 5 +- src/mindlessgen/qm/tm.py | 112 ++++++++++++++++++-------- test/test_molecules/test_molecule.py | 50 ++++++++++++ 7 files changed, 206 insertions(+), 80 deletions(-) diff --git a/mindlessgen.toml b/mindlessgen.toml index 4b6c9c4..0e5778f 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -90,8 +90,10 @@ gridsize = 1 scf_cycles = 100 [turbomole] -# > Path to the turbomole executable. The names `ridft` and `jobex` are automatically searched for. Options: -turbomole_path = "/path/to/turbomole" +# > Path to the ridft executable. The name `ridft` is automatically searched for. Options: +ridft_path = "/path/to/ridft" +# > Path to the jobex executable. The name `jobex` is automatically searched for. Options: +jobex_path = "/path/to/jobex" # > Functional/Method: Options: functional = "PBE" # > Basis set: Options: diff --git a/src/mindlessgen/generator/main.py b/src/mindlessgen/generator/main.py index 4928f94..670393f 100644 --- a/src/mindlessgen/generator/main.py +++ b/src/mindlessgen/generator/main.py @@ -17,8 +17,10 @@ ORCA, get_orca_path, Turbomole, - get_turbomole_path, + get_ridft_path, + get_jobex_path, GXTB, + get_gxtb_path, ) from ..molecules import iterative_optimization, postprocess_mol from ..prog import ConfigManager @@ -55,7 +57,6 @@ def generator(config: ConfigManager) -> tuple[list[Molecule] | None, int]: config, get_xtb_path, get_orca_path, # g-xTB cannot be used anyway - get_turbomole_path, ) if config.general.postprocess: @@ -64,7 +65,9 @@ def generator(config: ConfigManager) -> tuple[list[Molecule] | None, int]: config, get_xtb_path, get_orca_path, - get_turbomole_path, + get_ridft_path, + get_jobex_path, + get_gxtb_path, ) else: postprocess_engine = None @@ -280,7 +283,8 @@ def setup_engines( cfg: ConfigManager, xtb_path_func: Callable, orca_path_func: Callable, - turbomole_path_func: Callable, + ridft_path_func: Callable | None = None, + jobex_path_func: Callable | None = None, gxtb_path_func: Callable | None = None, ): """ @@ -303,13 +307,26 @@ def setup_engines( raise ImportError("orca not found.") from e return ORCA(path, cfg.orca) elif engine_type == "turbomole": - try: - path = turbomole_path_func(cfg.turbomole.turbomole_path) - if not path: - raise ImportError("turbomole not found.") - except ImportError as e: - raise ImportError("turbomole not found.") from e - return Turbomole(path, cfg.turbomole) + if ridft_path_func is None or jobex_path_func is None: + raise ImportError( + "No callable functions for determining the ridft and jobex paths." + ) + if cfg.postprocess.optimize: + try: + path = jobex_path_func(cfg.turbomole.jobex_path) + if not path: + raise ImportError("jobex not found.") + except ImportError as e: + raise ImportError("jobex not found.") from e + return Turbomole(path, cfg.turbomole) + else: + try: + path = ridft_path_func(cfg.turbomole.ridft_path) + if not path: + raise ImportError("turbomole not found.") + except ImportError as e: + raise ImportError("turbomole not found.") from e + return Turbomole(path, cfg.turbomole) elif engine_type == "gxtb": if gxtb_path_func is None: raise ImportError("No callable function for determining the g-xTB path.") diff --git a/src/mindlessgen/molecules/molecule.py b/src/mindlessgen/molecules/molecule.py index a3187c6..7b23d57 100644 --- a/src/mindlessgen/molecules/molecule.py +++ b/src/mindlessgen/molecules/molecule.py @@ -294,16 +294,15 @@ def read_mol_from_coord(file: str | Path) -> Molecule: else: raise TypeError("String or Path expected.") molecule.read_xyz_from_coord(file_path) - uhf_path = file_path.parent / ".UHF" - if uhf_path.exists(): - molecule.read_uhf_from_file(uhf_path) - else: - molecule.uhf = 0 - chrg_path = file_path.parent / ".CHRG" - if chrg_path.exists(): - molecule.read_charge_from_file(chrg_path) + if file_path.with_suffix(".CHRG").exists(): + molecule.read_charge_from_file(file_path.with_suffix(".CHRG")) else: molecule.charge = 0 + if file_path.with_suffix(".UHF").exists(): + molecule.read_uhf_from_file(file_path.with_suffix(".UHF")) + else: + molecule.uhf = 0 + molecule.name = file_path.stem return molecule @property @@ -607,10 +606,10 @@ def get_coord_str(self) -> str: coord_str = "$coord\n" for i in range(self.num_atoms): coord_str += ( - f"{self.xyz[i, 0] / BOHR2AA:>20.14f} " - + f"{self.xyz[i, 1] / BOHR2AA:>20.14f} " - + f"{self.xyz[i, 2] / BOHR2AA:>20.14f} " - + f"{PSE[self.ati[i] + 1]}\n" + f"{self.xyz[i, 0] / BOHR2AA:>12.7f} " + + f"{self.xyz[i, 1] / BOHR2AA:>12.7f} " + + f"{self.xyz[i, 2] / BOHR2AA:>12.7f} " + + f"{PSE[self.ati[i] + 1]:>5}\n" ) coord_str += "$end\n" return coord_str @@ -648,13 +647,11 @@ def write_coord_to_file(self, filename: str | Path | None = None): f.write(self.get_coord_str()) # if the charge is set, write it to a '.CHRG' file if self._charge is not None and self._charge != 0: - chrg_filename = filename.parent / ".CHRG" - with open(chrg_filename, "w", encoding="utf8") as f: + with open(filename.with_suffix(".CHRG"), "w", encoding="utf8") as f: f.write(f"{self.charge}\n") # if the UHF is set, write it to a '.UHF' file if self._uhf is not None and self._uhf > 0: - uhf_filename = filename.parent / ".UHF" - with open(uhf_filename, "w", encoding="utf8") as f: + with open(filename.with_suffix(".UHF"), "w", encoding="utf8") as f: f.write(f"{self.uhf}\n") def read_xyz_from_coord(self, filename: str | Path) -> None: @@ -666,12 +663,6 @@ def read_xyz_from_coord(self, filename: str | Path) -> None: $coord 0.00000000000000 0.00000000000000 3.60590687077610 u 0.00000000000000 0.00000000000000 -3.60590687077610 u - 0.00000000000000 2.74684941070244 0.00000000000000 F - 3.72662552762076 0.00000000000000 5.36334486193405 F - -3.72662552762076 0.00000000000000 5.36334486193405 F - 3.72662552762076 0.00000000000000 -5.36334486193405 F - 0.00000000000000 -2.74684941070244 0.00000000000000 F - -3.72662552762076 0.00000000000000 -5.36334486193405 F $end ... or ... $coord @@ -693,6 +684,8 @@ def read_xyz_from_coord(self, filename: str | Path) -> None: """ with open(filename, encoding="utf8") as f: lines = f.readlines() + if lines[0] != "$coord\n": + raise ValueError("File does not start with '$coord'.") # number of atoms num_atoms = 0 for line in lines: diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index 468aa37..67a3f84 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -931,7 +931,8 @@ class TURBOMOLEConfig(BaseConfig): """ def __init__(self: TURBOMOLEConfig) -> None: - self._turbomole_path: str | Path = "turbomole" + self._ridft_path: str | Path = "ridft" + self._jobex_path: str | Path = "jobex" self._functional: str = "pbe" self._basis: str = "def2-SVP" self._scf_cycles: int = 100 @@ -940,20 +941,36 @@ def get_identifier(self) -> str: return "turbomole" @property - def turbomole_path(self): + def ridft_path(self): """ - Get the turbomole path. + Get the ridft path. """ - return self._turbomole_path + return self._ridft_path - @turbomole_path.setter - def turbomole_path(self, turbomole_path: str | Path): + @ridft_path.setter + def ridft_path(self, ridft_path: str | Path): """ - Set the turbomole path. + Set the ridft path. """ - if not isinstance(turbomole_path, str | Path): - raise TypeError("turbomole_path should be a string or Path.") - self._turbomole_path = turbomole_path + if not isinstance(ridft_path, str | Path): + raise TypeError("ridft_path should be a string or Path.") + self._ridft_path = ridft_path + + @property + def jobex_path(self): + """ + Get the jobex path. + """ + return self._jobex_path + + @jobex_path.setter + def jobex_path(self, jobex_path: str | Path): + """ + Set the jobex path. + """ + if not isinstance(jobex_path, str | Path): + raise TypeError("jobex_path should be a string or Path.") + self._jobex_path = jobex_path @property def functional(self): diff --git a/src/mindlessgen/qm/__init__.py b/src/mindlessgen/qm/__init__.py index 07b8873..f16adcc 100644 --- a/src/mindlessgen/qm/__init__.py +++ b/src/mindlessgen/qm/__init__.py @@ -5,7 +5,7 @@ from .base import QMMethod from .xtb import XTB, get_xtb_path from .orca import ORCA, get_orca_path -from .tm import Turbomole, get_turbomole_path +from .tm import Turbomole, get_ridft_path, get_jobex_path from .gxtb import GXTB, get_gxtb_path __all__ = [ @@ -15,7 +15,8 @@ "ORCA", "get_orca_path", "Turbomole", - "get_turbomole_path", + "get_ridft_path", + "get_jobex_path", "GXTB", "get_gxtb_path", ] diff --git a/src/mindlessgen/qm/tm.py b/src/mindlessgen/qm/tm.py index eee939d..1b9cf8c 100644 --- a/src/mindlessgen/qm/tm.py +++ b/src/mindlessgen/qm/tm.py @@ -36,9 +36,8 @@ def optimize( verbosity: int = 1, ) -> Molecule: """ - Optimize a molecule using ORCA. + Optimize a molecule using Turbomole. """ - # Create a unique temporary directory using TemporaryDirectory context manager with TemporaryDirectory(prefix="turbomole_") as temp_dir: temp_path = Path(temp_dir).resolve() @@ -56,11 +55,11 @@ def optimize( f.write(tm_input) # Setup the turbomole optimization command including the max number of optimization cycles - arguments = [f"PARNODES=1 jobex -ri -c {max_cycles}"] + arguments = [f"PARNODES=1 {self.path} -ri -c {max_cycles}"] if verbosity > 2: print(f"Running command: {' '.join(arguments)}") - tm_log_out, tm_log_err, return_code = self._run( + tm_log_out, tm_log_err, return_code = self._run_opt( temp_path=temp_path, arguments=arguments ) if verbosity > 2: @@ -71,9 +70,9 @@ def optimize( ) # # revert the coord file to xyz file - xyzfile = Path(temp_path / "coord").resolve() + coordfile = Path(temp_path / "coord").resolve() optimized_molecule = molecule.copy() - optimized_molecule.read_xyz_from_coord(xyzfile) + optimized_molecule.read_xyz_from_coord(coordfile) return optimized_molecule def singlepoint(self, molecule: Molecule, verbosity: int = 1) -> str: @@ -86,7 +85,6 @@ def singlepoint(self, molecule: Molecule, verbosity: int = 1) -> str: # write the molecule to a temporary file molfile = "coord" molecule.write_coord_to_file(temp_path / molfile) - print(temp_path / molfile) # write the input file inputname = "control" @@ -99,7 +97,9 @@ def singlepoint(self, molecule: Molecule, verbosity: int = 1) -> str: f.write(tm_input) # set up the turbomole single point calculation command - run_tm = ["PARNODES=1 ridft > ridft.out"] + run_tm = [f"PARNODES=1 {self.path} -ri"] + if verbosity > 2: + print(f"Running command: {' '.join(run_tm)}") tm_log_out, tm_log_err, return_code = self._run( temp_path=temp_path, arguments=run_tm @@ -138,28 +138,57 @@ def _run(self, temp_path: Path, arguments: list[str]) -> tuple[str, str, int]: cwd=temp_path, capture_output=True, check=True, - shell=True, + shell=True, # shell=True is necessary for inserting the `PARNODES=xx` command, unfortunately. ) - # get the output of the turbomole calculation (of both stdout and stderr) - if "PARNODES=1 ridft > ridft.out" in arguments[0]: - with open(temp_path / "ridft.out", encoding="utf-8") as file: + tm_log_out = tm_out.stdout.decode("utf8", errors="replace") + tm_log_err = tm_out.stderr.decode("utf8", errors="replace") + + if "ridft : all done" not in tm_log_out: + raise sp.CalledProcessError( + 1, + str(self.path), + tm_log_out.encode("utf8"), + tm_log_err.encode("utf8"), + ) + return tm_log_out, tm_log_err, 0 + except sp.CalledProcessError as e: + tm_log_out = e.stdout.decode("utf8", errors="replace") + tm_log_err = e.stderr.decode("utf8", errors="replace") + return tm_log_out, tm_log_err, e.returncode + + def _run_opt(self, temp_path: Path, arguments: list[str]) -> tuple[str, str, int]: + """ + Run turbomole optimization with the given arguments. + + Arguments: + arguments (list[str]): The arguments to pass to turbomole. + + Returns: + tuple[str, str, int]: The output of the turbomole calculation (stdout and stderr) + and the return code + """ + try: + tm_out = sp.run( + arguments, + cwd=temp_path, + capture_output=True, + check=True, + shell=True, # shell=True is necessary for inserting the `PARNODES=xx` command, unfortunately. + ) + # Read the job-last file to get the output of the calculation + output_file = temp_path / "job.last" + if output_file.exists(): + with open(output_file, encoding="utf-8") as file: tm_log_out = file.read() tm_log_err = tm_out.stderr.decode("utf8", errors="replace") else: - # Read the job-last file to get the output of the calculation - output_file = temp_path / "job.last" - if output_file.exists(): - with open(output_file, encoding="utf-8") as file: - tm_log_out = file.read() - tm_log_err = tm_out.stderr.decode("utf8", errors="replace") - else: - raise FileNotFoundError(f"Output file {output_file} not found.") + raise FileNotFoundError(f"Output file {output_file} not found.") if "ridft : all done" not in tm_log_out: raise sp.CalledProcessError( 1, - str(output_file), + str(self.path), tm_log_out.encode("utf8"), tm_log_err.encode("utf8"), ) @@ -196,28 +225,45 @@ def _gen_input( # 3. Add the renamed method to the ABC `QMMethod` # 4. In `main.py`: Remove the passing of the path finder functions as arguments # and remove the boiler plate code to make it more general. -def get_turbomole_path(binary_name: str | Path | None = None) -> Path: +def get_ridft_path(binary_name: str | Path | None = None) -> Path: """ Retrieve the path to the Turbomole 'ridft' binary. - This function searches for the 'ridft' script in the system PATH and returns - its absolute path. It is assumed that other Turbomole binaries, such as 'jobex', - are located in the same directory as 'ridft'. The path to 'ridft' will be used - as the reference point for accessing other Turbomole binaries. - Returns: Path: The absolute path to the 'ridft' binary. """ - default_turbomole_names: list[str | Path] = ["ridft"] - # put binary name at the beginning of the lixt to prioritize it + default_ridft_names: list[str | Path] = ["ridft"] + # put binary name at the beginning of the list to prioritize it if binary_name is not None: - binary_names = [binary_name] + default_turbomole_names + binary_names = [binary_name] + default_ridft_names else: - binary_names = default_turbomole_names + binary_names = default_ridft_names # Get turbomole path from 'which ridft' command for binpath in binary_names: which_ridft = shutil.which(binpath) if which_ridft: - ridft_path = Path(which_ridft).resolve() + ridft_path = Path(which_ridft) return ridft_path - raise ImportError("'turbomole' binary could not be found.") + raise ImportError("'ridft' (TURBOMOLE DFT) binary could not be found.") + + +def get_jobex_path(binary_name: str | Path | None = None) -> Path: + """ + Retrieve the path to the Turbomole 'jobex' binary. + + Returns: + Path: The absolute path to the 'jobex' binary. + """ + default_jobex_names: list[str | Path] = ["jobex"] + # put binary name at the beginning of the list to prioritize it + if binary_name is not None: + binary_names = [binary_name] + default_jobex_names + else: + binary_names = default_jobex_names + # Get turbomole path from 'which jobex' command + for binpath in binary_names: + which_jobex = shutil.which(binpath) + if which_jobex: + jobex_path = Path(which_jobex).resolve() + return jobex_path + raise ImportError("'jobex' (TURBOMOLE) binary could not be found.") diff --git a/test/test_molecules/test_molecule.py b/test/test_molecules/test_molecule.py index 2bd1a4d..c4b2dda 100644 --- a/test/test_molecules/test_molecule.py +++ b/test/test_molecules/test_molecule.py @@ -198,6 +198,56 @@ def test_read_mol_from_file(tmp_path, filename, num_atoms, charge, expected_exce ) +@pytest.mark.parametrize( + "filename, num_atoms, charge, expected_exception", + [ + ("test_molecule", 2, 1, None), # Valid file and data + ("invalid_molecule", None, None, FileNotFoundError), # Missing file + ("corrupted_molecule", None, None, ValueError), # Invalid data + ], +) +def test_read_xyz_from_coord(tmp_path, filename, num_atoms, charge, expected_exception): + if filename == "test_molecule": + # Create valid XYZ and CHRG files for testing + coord_content = "$coord\n0.0 0.0 0.0 H\n1.8897261259077822 0.0 0.0 O\n$end\n" + chrg_content = "1\n" + elif filename == "corrupted_molecule": + # Create an invalid XYZ file + coord_content = "Invalid content\n" + chrg_content = "invalid\n" + else: + # Simulate missing file by skipping file creation + coord_content = None + chrg_content = None + + if coord_content and chrg_content: + coord_file = tmp_path / f"{filename}" + chrg_file = tmp_path / f"{filename}.CHRG" + coord_file.write_text(coord_content) + chrg_file.write_text(chrg_content) + + if filename == "corrupted_molecule": + print(f"expected_exception: {expected_exception}") + with pytest.raises(expected_exception): + Molecule.read_mol_from_coord(str(tmp_path / coord_file)) + elif filename == "invalid_molecule": + print(f"expected_exception: {expected_exception}") + with pytest.raises(expected_exception): + # load from "filename", which does not exist -> FileNotFoundError + Molecule.read_mol_from_coord(str(tmp_path / filename)) + else: + mol = Molecule.read_mol_from_coord(str(tmp_path / coord_file)) + assert mol.num_atoms == num_atoms + assert mol.charge == charge + assert mol.uhf == 0 + np.testing.assert_array_equal(mol.ati, np.array([0, 7])) + assert mol.atlist[0] == 1 + assert mol.atlist[7] == 1 + np.testing.assert_array_equal( + mol.xyz, np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]]) + ) + + def test_copy_method(): mol = Molecule(name="original") mol.num_atoms = 2 From 6cb0321ed405c7ea6d409243977e2030a910308c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Thu, 16 Jan 2025 14:00:36 +0100 Subject: [PATCH 08/14] Implementation of two pathes for turbomole MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- mindlessgen.toml | 2 +- src/mindlessgen/generator/main.py | 39 ++++++++++++++----------------- src/mindlessgen/prog/config.py | 3 --- src/mindlessgen/qm/tm.py | 34 ++++++++++++++++++--------- 4 files changed, 41 insertions(+), 37 deletions(-) diff --git a/mindlessgen.toml b/mindlessgen.toml index 0e5778f..722aa5f 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -51,7 +51,7 @@ molecular_charge = "none" [refine] # > Maximum number of fragment optimization cycles. Options: max_frag_cycles = 10 -# > Quantum Mechanics (QM) engine to use. Options: 'xtb', 'orca' +# > Quantum Mechanics (QM) engine to use. Options: 'xtb', 'orca'. 'turbomole' engine = "xtb" # > HOMO-LUMO gap threshold applied at the end of the refinement step hlgap = 0.5 diff --git a/src/mindlessgen/generator/main.py b/src/mindlessgen/generator/main.py index 670393f..1854d38 100644 --- a/src/mindlessgen/generator/main.py +++ b/src/mindlessgen/generator/main.py @@ -57,6 +57,8 @@ def generator(config: ConfigManager) -> tuple[list[Molecule] | None, int]: config, get_xtb_path, get_orca_path, # g-xTB cannot be used anyway + get_ridft_path, + get_jobex_path, ) if config.general.postprocess: @@ -283,8 +285,8 @@ def setup_engines( cfg: ConfigManager, xtb_path_func: Callable, orca_path_func: Callable, - ridft_path_func: Callable | None = None, - jobex_path_func: Callable | None = None, + ridft_path_func: Callable, + jobex_path_func: Callable, gxtb_path_func: Callable | None = None, ): """ @@ -307,26 +309,19 @@ def setup_engines( raise ImportError("orca not found.") from e return ORCA(path, cfg.orca) elif engine_type == "turbomole": - if ridft_path_func is None or jobex_path_func is None: - raise ImportError( - "No callable functions for determining the ridft and jobex paths." - ) - if cfg.postprocess.optimize: - try: - path = jobex_path_func(cfg.turbomole.jobex_path) - if not path: - raise ImportError("jobex not found.") - except ImportError as e: - raise ImportError("jobex not found.") from e - return Turbomole(path, cfg.turbomole) - else: - try: - path = ridft_path_func(cfg.turbomole.ridft_path) - if not path: - raise ImportError("turbomole not found.") - except ImportError as e: - raise ImportError("turbomole not found.") from e - return Turbomole(path, cfg.turbomole) + try: + jobex_path = jobex_path_func(cfg.turbomole.jobex_path) + if not jobex_path: + raise ImportError("jobex not found.") + except ImportError as e: + raise ImportError("jobex not found.") from e + try: + ridft_path = ridft_path_func(cfg.turbomole.ridft_path) + if not ridft_path: + raise ImportError("ridft not found.") + except ImportError as e: + raise ImportError("ridft not found.") from e + return Turbomole(jobex_path, ridft_path, cfg.turbomole) elif engine_type == "gxtb": if gxtb_path_func is None: raise ImportError("No callable function for determining the g-xTB path.") diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index 67a3f84..37af6c8 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -1173,9 +1173,6 @@ def load_from_toml(self, config_file: str | Path) -> None: [orca] orca_option = "opt" - [turbomole] - turbomole_option = "opt" - Arguments: config_file (str): Path to the configuration file diff --git a/src/mindlessgen/qm/tm.py b/src/mindlessgen/qm/tm.py index 1b9cf8c..a7029c9 100644 --- a/src/mindlessgen/qm/tm.py +++ b/src/mindlessgen/qm/tm.py @@ -17,16 +17,28 @@ class Turbomole(QMMethod): This class handles all interaction with the turbomole external dependency. """ - def __init__(self, path: str | Path, turbomolecfg: TURBOMOLEConfig) -> None: + def __init__( + self, + jobex_path: str | Path, + ridft_path: str | Path, + turbomolecfg: TURBOMOLEConfig, + ) -> None: """ Initialize the turbomole class. """ - if isinstance(path, str): - self.path: Path = Path(path).resolve() - elif isinstance(path, Path): - self.path = path + if isinstance(jobex_path, str): + self.jobex_path: Path = Path(jobex_path).resolve() + elif isinstance(jobex_path, Path): + self.jobex_path = jobex_path + else: + raise TypeError("jobex_path should be a string or a Path object.") + + if isinstance(ridft_path, str): + self.ridft_path: Path = Path(ridft_path).resolve() + elif isinstance(ridft_path, Path): + self.ridft_path = ridft_path else: - raise TypeError("turbomole_path should be a string or a Path object.") + raise TypeError("ridft_path should be a string or a Path object.") self.cfg = turbomolecfg def optimize( @@ -55,7 +67,7 @@ def optimize( f.write(tm_input) # Setup the turbomole optimization command including the max number of optimization cycles - arguments = [f"PARNODES=1 {self.path} -ri -c {max_cycles}"] + arguments = [f"PARNODES=1 {self.jobex_path} -c {max_cycles}"] if verbosity > 2: print(f"Running command: {' '.join(arguments)}") @@ -97,7 +109,7 @@ def singlepoint(self, molecule: Molecule, verbosity: int = 1) -> str: f.write(tm_input) # set up the turbomole single point calculation command - run_tm = [f"PARNODES=1 {self.path} -ri"] + run_tm = [f"PARNODES=1 {self.ridft_path}"] if verbosity > 2: print(f"Running command: {' '.join(run_tm)}") @@ -147,7 +159,7 @@ def _run(self, temp_path: Path, arguments: list[str]) -> tuple[str, str, int]: if "ridft : all done" not in tm_log_out: raise sp.CalledProcessError( 1, - str(self.path), + str(self.ridft_path), tm_log_out.encode("utf8"), tm_log_err.encode("utf8"), ) @@ -188,8 +200,8 @@ def _run_opt(self, temp_path: Path, arguments: list[str]) -> tuple[str, str, int if "ridft : all done" not in tm_log_out: raise sp.CalledProcessError( 1, - str(self.path), - tm_log_out.encode("utf8"), + str(self.jobex_path), + tm_log_out, tm_log_err.encode("utf8"), ) return tm_log_out, tm_log_err, 0 From 95458b84ffd42b44c4b28a87473890229b1b8a63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Thu, 16 Jan 2025 17:04:28 +0100 Subject: [PATCH 09/14] Ignore HOMO-LUMO gap not implemented as refine engine for Orca and Trubomole MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- src/mindlessgen/molecules/refinement.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mindlessgen/molecules/refinement.py b/src/mindlessgen/molecules/refinement.py index a520ebd..6bd415a 100644 --- a/src/mindlessgen/molecules/refinement.py +++ b/src/mindlessgen/molecules/refinement.py @@ -3,6 +3,7 @@ to obtain finally a valid molecule. """ +import warnings from pathlib import Path import networkx as nx # type: ignore import numpy as np @@ -151,6 +152,8 @@ def iterative_optimization( gap_sufficient = engine.check_gap( molecule=rev_mol, threshold=config_refine.hlgap, verbosity=verbosity ) + except NotImplementedError: + warnings.warn("HOMO-LUMO gap check not implemented with this engine.") except RuntimeError as e: raise RuntimeError("HOMO-LUMO gap could not be checked.") from e if not gap_sufficient: From b57bc93bed18a46166ad9d31ec819e32eafe03a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20M=C3=BCller?= Date: Thu, 16 Jan 2025 19:02:24 +0100 Subject: [PATCH 10/14] improve TURBOMOLE configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marcel Müller --- src/mindlessgen/qm/tm.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mindlessgen/qm/tm.py b/src/mindlessgen/qm/tm.py index a7029c9..448567f 100644 --- a/src/mindlessgen/qm/tm.py +++ b/src/mindlessgen/qm/tm.py @@ -219,13 +219,18 @@ def _gen_input( """ tm_input = "$coord file=coord\n" tm_input += f"$charge={molecule.charge} unpaired={molecule.uhf}\n" - tm_input += "$symmetry c1\n" tm_input += "$atoms\n" tm_input += f" basis={self.cfg.basis}\n" tm_input += "$dft\n" tm_input += f" functional {self.cfg.functional}\n" + tm_input += f" gridsize m4\n" tm_input += "$rij\n" + if self.cfg.functional.endswith("-v"): + tm_input += "$doscnl\n" + else: + tm_input += "$disp4\n" tm_input += f"$scfiterlimit {self.cfg.scf_cycles}\n" + tm_input += f"$scfconv 7\n" tm_input += "$energy file=energy\n" tm_input += "$grad file=gradient\n" tm_input += "$end" From 970faf9434dfe77811fdfe77e6f53b144506e9a9 Mon Sep 17 00:00:00 2001 From: Marcel Mueller Date: Thu, 16 Jan 2025 19:06:14 +0100 Subject: [PATCH 11/14] correct linting errors Signed-off-by: Marcel Mueller --- src/mindlessgen/qm/tm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mindlessgen/qm/tm.py b/src/mindlessgen/qm/tm.py index 448567f..c75806e 100644 --- a/src/mindlessgen/qm/tm.py +++ b/src/mindlessgen/qm/tm.py @@ -223,14 +223,14 @@ def _gen_input( tm_input += f" basis={self.cfg.basis}\n" tm_input += "$dft\n" tm_input += f" functional {self.cfg.functional}\n" - tm_input += f" gridsize m4\n" + tm_input += " gridsize m4\n" tm_input += "$rij\n" if self.cfg.functional.endswith("-v"): tm_input += "$doscnl\n" else: tm_input += "$disp4\n" tm_input += f"$scfiterlimit {self.cfg.scf_cycles}\n" - tm_input += f"$scfconv 7\n" + tm_input += "$scfconv 7\n" tm_input += "$energy file=energy\n" tm_input += "$grad file=gradient\n" tm_input += "$end" From c71bdfa6ba66e5aadcb5e8c841d3f726e56c9439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20M=C3=BCller?= Date: Fri, 17 Jan 2025 09:34:49 +0100 Subject: [PATCH 12/14] exclude more recent interfaces from coverage check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marcel Müller --- pyproject.toml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9e28d00..fc0b69d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,12 @@ markers = "optional: mark test as optional" plugins = ["covdefaults"] source = ["./src"] # Exclude interfaces to external programs from coverage -omit = ["src/mindlessgen/qm/xtb.py", "src/mindlessgen/qm/orca.py"] +omit = [ + "src/mindlessgen/qm/xtb.py", + "src/mindlessgen/qm/orca.py", + "src/mindlessgen/qm/tm.py", + "src/mindlessgen/qm/gxtb.py", +] [tool.coverage.report] fail_under = 50 From b1c5cc20edbd438da6b57878d39f35c82d08347a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Fri, 17 Jan 2025 10:48:23 +0100 Subject: [PATCH 13/14] added a test for the 'write_coord_to_file' function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- test/test_molecules/test_molecule.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/test/test_molecules/test_molecule.py b/test/test_molecules/test_molecule.py index c4b2dda..f3ff5df 100644 --- a/test/test_molecules/test_molecule.py +++ b/test/test_molecules/test_molecule.py @@ -248,6 +248,32 @@ def test_read_xyz_from_coord(tmp_path, filename, num_atoms, charge, expected_exc ) +def test_write_coord_to_file(tmp_path): + mol = Molecule(name="test") + mol.num_atoms = 2 + mol.ati = np.array([0, 8], dtype=int) + mol.xyz = np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]]) + + expected_content = """$coord +0.0000000 0.0000000 0.0000000 H +1.8897261 0.0000000 0.0000000 F +$end +""" + + mol.write_coord_to_file(str(tmp_path / "test.coord")) + + assert (tmp_path / "test.coord").exists() + + # Lese den Inhalt der Datei und entferne überflüssige Leerzeichen für den Vergleich + actual_content = (tmp_path / "test.coord").read_text().strip() + normalized_actual = " ".join(actual_content.split()) + normalized_expected = " ".join(expected_content.split()) + + assert ( + normalized_actual == normalized_expected + ), f"Actual content:\n{actual_content}\nExpected content:\n{expected_content}" + + def test_copy_method(): mol = Molecule(name="original") mol.num_atoms = 2 From 6f27091a03f30e219b7bdd97dc1dd101c2aeea19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Sch=C3=B6ps?= Date: Fri, 17 Jan 2025 10:52:48 +0100 Subject: [PATCH 14/14] sync the lokal branch with the remote branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Schöps --- pyproject.toml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9e28d00..fc0b69d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,12 @@ markers = "optional: mark test as optional" plugins = ["covdefaults"] source = ["./src"] # Exclude interfaces to external programs from coverage -omit = ["src/mindlessgen/qm/xtb.py", "src/mindlessgen/qm/orca.py"] +omit = [ + "src/mindlessgen/qm/xtb.py", + "src/mindlessgen/qm/orca.py", + "src/mindlessgen/qm/tm.py", + "src/mindlessgen/qm/gxtb.py", +] [tool.coverage.report] fail_under = 50