diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 738bf25b..c545e4f8 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -25,6 +25,7 @@ jobs: python-version: "3.10" - name: install dependencies run: | + sudo apt-get install -y cp2k python -m pip install tox - name: build documentation run: tox -e docs diff --git a/.gitignore b/.gitignore index 80d29ad2..f6eb9338 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ docs/src/examples/ *build* *egg-info/ +sg_execution_times.rst diff --git a/docs/src/conf.py b/docs/src/conf.py index dd941401..507b5601 100644 --- a/docs/src/conf.py +++ b/docs/src/conf.py @@ -1,6 +1,7 @@ # Add any Sphinx extension module names here, as strings. extensions = [ "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", "sphinx_gallery.load_style", ] @@ -12,3 +13,14 @@ htmlhelp_basename = "COSMO software-cookbook" html_theme = "furo" + + +intersphinx_mapping = { + "ase": ("https://wiki.fysik.dtu.dk/ase/", None), + "metatensor": ("https://lab-cosmo.github.io/metatensor/latest/", None), + "numpy": ("https://numpy.org/doc/stable/", None), + "matplotlib": ("https://matplotlib.org/stable/", None), + "python": ("https://docs.python.org/3", None), + "rascaline": ("https://luthaf.fr/rascaline/latest/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/", None), +} diff --git a/docs/src/index.rst b/docs/src/index.rst index c0690894..6f0c01b4 100644 --- a/docs/src/index.rst +++ b/docs/src/index.rst @@ -13,3 +13,4 @@ COSMO Software Cookbook examples/lode_linear/lode_tutorial examples/sample_selection/sample_selection examples/gaas_map/gaas_map + examples/cp2k_run_batch/setup_reftraj \ No newline at end of file diff --git a/examples/cp2k_run_batch/.gitignore b/examples/cp2k_run_batch/.gitignore new file mode 100644 index 00000000..9fc7d688 --- /dev/null +++ b/examples/cp2k_run_batch/.gitignore @@ -0,0 +1,5 @@ +production/ + +cp2k.out +cp2k.inp +cp2k_shell.ssmp diff --git a/examples/cp2k_run_batch/README.rst b/examples/cp2k_run_batch/README.rst new file mode 100644 index 00000000..bc28d4b6 --- /dev/null +++ b/examples/cp2k_run_batch/README.rst @@ -0,0 +1,7 @@ +CP2K bash computation +============================ + +This is an example of a batch calculation using CP2K. +The inputs are a set of structures in `example.xyz` +using the parameters defined in `./data/reftraj_template.cp2k` +importing basisset and pseudopotentials from the local CP2K install. diff --git a/examples/cp2k_run_batch/environement.yml b/examples/cp2k_run_batch/environement.yml new file mode 100644 index 00000000..b52c25ff --- /dev/null +++ b/examples/cp2k_run_batch/environement.yml @@ -0,0 +1,6 @@ +name: cp2k_run_batch +dependencies: + - python=3.11 + - pip + - pip: + - ase diff --git a/examples/cp2k_run_batch/example.xyz b/examples/cp2k_run_batch/example.xyz new file mode 100644 index 00000000..c0031e2d --- /dev/null +++ b/examples/cp2k_run_batch/example.xyz @@ -0,0 +1,8 @@ +6 +Lattice="3.9111732301689224 0.0 0.0 0.0 3.9111732301689224 0.0 0.0 0.0 3.9111732301689224" Properties=species:S:1:pos:R:3 #=T CELL(abcABC):=T 3.91117=T 90.00000=T Step:=T 1975600=T Bead:=T 0=T x_centroidangstrom=T cellangstrom=T count=0 pbc="T T T" +H 2.55799292 0.30476323 0.67883354 +H 1.46739292 0.53584100 3.59015677 +O 1.78619292 3.82685646 0.31171354 +H 3.52058031 2.55936677 2.71091323 +H 0.84863385 2.05009677 1.85452646 +O 0.09813385 1.75660677 2.33837646 diff --git a/examples/cp2k_run_batch/reftraj_template.cp2k b/examples/cp2k_run_batch/reftraj_template.cp2k new file mode 100644 index 00000000..8792d232 --- /dev/null +++ b/examples/cp2k_run_batch/reftraj_template.cp2k @@ -0,0 +1,144 @@ +@SET PREP 0 + +@SET SCF_GUESS RESTART +@SET SCREEN_ON_INITIAL_P TRUE + +@IF ${PREP} + @SET SCF_GUESS ATOMIC + @SET SCREEN_ON_INITIAL_P FALSE +@ENDIF + +&GLOBAL + PROJECT //PROJECT// + PREFERRED_FFT_LIBRARY FFTW + FFTW_PLAN_TYPE MEASURE + RUN_TYPE MD + PRINT_LEVEL LOW +&END GLOBAL + +&MOTION + &PRINT + &CELL + &EACH + MD 1 + &END EACH + &END CELL + &FORCES + &EACH + MD 1 + &END EACH + &END FORCES + &END PRINT + &MD + ENSEMBLE REFTRAJ + &REFTRAJ ! Loads an external trajectory file and performs analysis on the loaded snapshots. + EVAL_ENERGY_FORCES TRUE + EVAL_FORCES TRUE + CELL_FILE_NAME reftraj.cell + TRAJ_FILE_NAME reftraj.xyz + FIRST_SNAPSHOT 1 + VARIABLE_VOLUME TRUE + LAST_SNAPSHOT //LAST_SNAPSHOT// + &END REFTRAJ + &END MD +&END MOTION + +&FORCE_EVAL + &PRINT + &FORCES + &EACH + MD 1 + &END EACH + &END FORCES + &END PRINT + &DFT + BASIS_SET_FILE_NAME GTH_BASIS_SETS + BASIS_SET_FILE_NAME BASIS_ADMM + POTENTIAL_FILE_NAME POTENTIAL + &MGRID + CUTOFF 400 + &END MGRID + &SCF + SCF_GUESS ${SCF_GUESS} + MAX_SCF 20 + EPS_SCF 5.0E-7 + &OT + MINIMIZER DIIS + PRECONDITIONER FULL_ALL + &END OT + &OUTER_SCF + MAX_SCF 20 + EPS_SCF 5.0E-7 + &END OUTER_SCF + &END SCF + &QS + EPS_DEFAULT 1.0E-12 + EPS_PGF_ORB 1.0E-16 + EXTRAPOLATION_ORDER 5 + &END QS + &XC # revPBE0-TC-D3 + &XC_FUNCTIONAL + &PBE + PARAMETRIZATION REVPBE + SCALE_X 0.75 + SCALE_C 1.0 + &END + &END XC_FUNCTIONAL + &HF + FRACTION 0.25 + &SCREENING + EPS_SCHWARZ 1.0E-6 + SCREEN_ON_INITIAL_P ${SCREEN_ON_INITIAL_P} + &END + &MEMORY + MAX_MEMORY 37000 + EPS_STORAGE_SCALING 0.1 + &END + &INTERACTION_POTENTIAL + POTENTIAL_TYPE TRUNCATED + CUTOFF_RADIUS 3.0 + T_C_G_DATA t_c_g.dat + &END + &HF_INFO + &END HF_INFO + &END + &VDW_POTENTIAL + POTENTIAL_TYPE PAIR_POTENTIAL + &PAIR_POTENTIAL + TYPE DFTD3 + R_CUTOFF 15 + LONG_RANGE_CORRECTION TRUE + REFERENCE_FUNCTIONAL revPBE0 + PARAMETER_FILE_NAME dftd3.dat + &END + &END + &XC_GRID + XC_DERIV SPLINE2 + &END + &END XC + &AUXILIARY_DENSITY_MATRIX_METHOD + METHOD BASIS_PROJECTION + ADMM_PURIFICATION_METHOD MO_DIAG + &END AUXILIARY_DENSITY_MATRIX_METHOD + &END DFT + &SUBSYS + &TOPOLOGY + COORD_FILE_NAME init.xyz + COORD_FILE_FORMAT XYZ + CONN_FILE_FORMAT GENERATE + &END TOPOLOGY + &CELL + ABC [angstrom] //CELL// + &END CELL + &KIND H + BASIS_SET TZV2P-GTH + BASIS_SET AUX_FIT cpFIT3 + POTENTIAL GTH-PBE-q1 + &END KIND + &KIND O + BASIS_SET TZV2P-GTH + BASIS_SET AUX_FIT cpFIT3 + POTENTIAL GTH-PBE-q6 + &END KIND + &END SUBSYS +&END FORCE_EVAL diff --git a/examples/cp2k_run_batch/run_calcs.sh b/examples/cp2k_run_batch/run_calcs.sh new file mode 100644 index 00000000..6b58c954 --- /dev/null +++ b/examples/cp2k_run_batch/run_calcs.sh @@ -0,0 +1,8 @@ +#! /bin/bash + +for i in $(find ./production/ -mindepth 1 -type d); do + cd $i + cp2k -i in.cp2k + cd - +done + diff --git a/examples/cp2k_run_batch/setup_reftraj.py b/examples/cp2k_run_batch/setup_reftraj.py new file mode 100644 index 00000000..c5b5f979 --- /dev/null +++ b/examples/cp2k_run_batch/setup_reftraj.py @@ -0,0 +1,337 @@ +r""" +Batch run of CP2K calculations +============================== + +.. start-body + +This is an example how to perform single point calculations based on list of structures +using `CP2K `_ using its `reftraj functionality +`_. The inputs are a +set of structures in :download:`example.xyz` using the DFT parameters defined in +:download:`reftraj_template.cp2k` importing basis set and pseudopotentials from the +local CP2K installation. The reference DFT parameters are taken from `Cheng et al. Ab +initio thermodynamics of liquid and solid water 2019 +`_. Due to the small size of the test +structure and convergence issues, we have decreased the size of the ``CUTOFF_RADIUS`` +from :math:`6.0\,\mathrm{Å}` to :math:`3.0\,\mathrm{Å}`. For actual production +calculations adapt the template! + +To run this example, we use a bare executable called with ``cp2k``. If you want to use +another version you can either adjust the the names within this example or link your +binary with a different name to ``cp2k``. +""" + +# %% +# We start the example by importing the required packages. + + +import os +import re +import shutil +import subprocess +from os.path import basename, splitext +from typing import List, Union + +import ase.io +import ase.visualize.plot +import matplotlib.pyplot as plt +import numpy as np +from ase.calculators.cp2k import CP2K + + +# %% +# Define necessary functions +# -------------------------- +# Next we below define necessary helper functions to run the example. + + +def write_reftraj(fname: str, frames: Union[ase.Atoms, List[ase.Atoms]]) -> None: + """Writes a list of ase atoms objects to a reference trajectory. + + A reference trajectory is the CP2K compatible format for the compuation of batches. + All frames must have the stoichiometry/composition. + """ + + if isinstance(frames, ase.Atoms): + frames = [frames] + + out = "" + for i, atoms in enumerate(frames): + if ( + len(atoms) != len(frames[0]) + or atoms.get_chemical_formula() != frames[0].get_chemical_formula() + ): + raise ValueError( + f"Atom symbols in frame {i},{atoms.get_chemical_formula()} are " + f"different compared to inital frame " + f"{frames[0].get_chemical_formula()}. " + "CP2K does not support changing atom types within a reftraj run!" + ) + + out += f"{len(atoms):>8}\n i = {i+1:>8}, time = {0:>12.3f}\n" + for atom in atoms: + pos = atom.position + out += f"{atom.symbol}{pos[0]:24.15f}{pos[1]:24.15f}{pos[2]:24.15f}\n" + out += "\n" + with open(fname, "w") as f: + f.write(out) + + +# %% + + +def write_cellfile(fname: str, frames: Union[ase.Atoms, List[ase.Atoms]]) -> None: + """Writes a cellfile for a list of ``ase.Atoms``. + + A Cellfile accompanies a reftraj containing the cell parameters. + """ + if isinstance(frames, ase.Atoms): + frames = [frames] + + out = ( + "# " + "Step " + "Time [fs] " + "Ax [Angstrom] " + "Ay [Angstrom] " + "Az [Angstrom] " + "Bx [Angstrom] " + "By [Angstrom] " + "Bz [Angstrom] " + "Cx [Angstrom] " + "Cy [Angstrom] " + "Cz [Angstrom] " + "Volume [Angstrom^3]\n" + ) + + for i, atoms in enumerate(frames): + out += f"{i+1:>8}{0:>12.3f}" + out += "".join([f"{c:>20.10f}" for c in atoms.cell.flatten()]) + out += f"{atoms.cell.volume:>25.10f}" + out += "\n" + + with open(fname, "w") as f: + f.write(out) + + +# %% + + +def write_cp2k_in( + fname: str, project_name: str, last_snapshot: int, cell: List[float] +) -> None: + """Writes a cp2k input file from a template. + + Importantly, it writes the location of the basis set definitions, + determined from the path of the system CP2K install to the input file. + """ + + cp2k_in = open("reftraj_template.cp2k", "r").read() + + cp2k_in = cp2k_in.replace("//PROJECT//", project_name) + cp2k_in = cp2k_in.replace("//LAST_SNAPSHOT//", str(last_snapshot)) + cp2k_in = cp2k_in.replace("//CELL//", " ".join([f"{c:.6f}" for c in cell])) + + with open(fname, "w") as f: + f.write(cp2k_in) + + +# %% + + +def mkdir_force(*args, **kwargs) -> None: + """Warpper to ``os.mkdir``. + + The function does not raise an error if the directory already exists. + """ + try: + os.mkdir(*args, **kwargs) + except OSError: + pass + + +# %% +# Prepare calculation inputs +# -------------------------- +# During this example we will create a directory named ``project_directory`` containing +# the subdirectories for each stoichiometry. This is necessary, because CP2K can only +# run calculations using a fixed stoichiometry at a time, using its ``reftraj`` +# functionality. +# +# Below we define the general information for the CP2K run. This includes the reference +# files for the structures, the ``project_name`` used to build the name of the +# trajectory during the CP2K run, the ``project_directory`` where we store all +# simulation output as well as the path ``write_to_file`` which is the name of the file +# containing the computed energies and forces of the sinulation. + +frames_full = ase.io.read("example.xyz", ":") +project_name = "test_calcs" # name of the global PROJECT +project_directory = "production" +write_to_file = "out.xyz" + +# %% +# Below we show the initial configuration of two water molecules in a cubic box with a +# side length of :math:`\approx 4\,\mathrm{Å}`. + +ase.visualize.plot.plot_atoms(frames_full[0]) + +plt.xlabel("Å") +plt.ylabel("Å") + +plt.show() + +# %% +# We now extreact the stoichiometry from the input dataset using ASE's +# :py:meth:`ase.symbols.Symbols.get_chemical_formula` method. + +frames_dict = {} + +for atoms in frames_full: + chemical_formula = atoms.get_chemical_formula() + try: + frames_dict[chemical_formula] + except KeyError: + frames_dict[chemical_formula] = [] + + frames_dict[chemical_formula].append(atoms) + +# %% +# Based on the stoichiometries we create one calculation subdirectories for the +# calculations. (reftraj, input and cellfile). For our example this is only is one +# directory named ``H4O2`` because our dataset consists only of a single structure with +# two water molecules. + +mkdir_force(project_directory) + +for stoichiometry, frames in frames_dict.items(): + current_directory = f"{project_directory}/{stoichiometry}" + mkdir_force(current_directory) + + write_cp2k_in( + f"{current_directory}/in.cp2k", + project_name=project_name, + last_snapshot=len(frames), + cell=frames[0].cell.diagonal(), + ) + + ase.io.write(f"{current_directory}/init.xyz", frames[0]) + write_reftraj(f"{current_directory}/reftraj.xyz", frames) + write_cellfile(f"{current_directory}/reftraj.cell", frames) + +# %% +# Run simulations +# --------------- +# Now we have all ingredients to run the simulations. Below we call the bash script +# :download:`run_calcs.sh`. +# +# .. literalinclude:: run_calcs.sh +# :language: bash +# +# This script will loop through all stoichiometry subdirectories and call the CP2K +# engine. + +# run the bash script directly from this script +subprocess.run("bash run_calcs.sh", shell=True) + +# %% +# .. note:: +# +# For a usage on an HPC environment you can parallize the loop over the subfolders +# and submit and single job per stoichiometry. +# +# Load results +# ------------ +# After the simulation we load the results and perform a unit version from the default +# CP2K output units (Bohr and Hartree) to Å and eV. + +cflength = 0.529177210903 # Bohr -> Å +cfenergy = 27.211386245988 # Hartree -> eV +cfforce = cfenergy / cflength # Hartree/Bohr -> eV/Å + +# %% +# Finally, we store the results as :class:`ase.Atoms` in the ``new_frames`` list and +# write them to the ``project_directory`` using the ``new_fname``. Here it will be +# written to ``production/out_dft.xyz``. + +new_frames = [] + +for stoichiometry, frames in frames_dict.items(): + current_directory = f"{project_directory}/{stoichiometry}" + + frames_dft = ase.io.read(f"{current_directory}/{project_name}-pos-1.xyz", ":") + forces_dft = ase.io.read(f"{current_directory}/{project_name}-frc-1.xyz", ":") + cell_dft = np.atleast_2d(np.loadtxt(f"{current_directory}/{project_name}-1.cell"))[ + :, 2:-1 + ] + + for i_atoms, atoms in enumerate(frames_dft): + frames_ref = frames[i_atoms] + + # Check consistent positions + if not np.allclose(atoms.positions, frames_ref.positions): + raise ValueError(f"Positions in frame {i_atoms} are not the same.") + + # Check consistent cell + if not np.allclose(frames_ref.cell.flatten(), cell_dft[i_atoms]): + raise ValueError(f"Cell dimensions in frame {i_atoms} are not the same.") + + atoms.info["E"] *= cfenergy + atoms.pbc = True + atoms.cell = frames_ref.cell + atoms.set_array("forces", cfforce * forces_dft[i_atoms].positions) + + new_frames += frames_dft + +new_fname = f"{splitext(basename(write_to_file))[0]}_dft.xyz" +ase.io.write(f"{project_directory}/{new_fname}", new_frames) + +# %% +# Perform calculations using ASE calculator +# ----------------------------------------- +# Above we performed the calculations using an external bash script. ASE also provides a +# calculator class that we can use the perform the caclulations with our input file +# without a detour of writing files to disk. +# +# To use the ASE calculator together with a custom input script this requires some +# adjustments. First the name of the executable that has the exact name ``cp2k_shell``. +# We create a symlink to follow this requirement. + +try: + os.symlink(shutil.which("cp2k"), "cp2k_shell.ssmp") +except OSError: + pass + +# %% +# Next, we load the input file abd remove ``GLOBAL`` section because from it + +inp = open("./production/H4O2/in.cp2k", "r").read() +inp = re.sub( + f"{re.escape('&GLOBAL')}.*?{re.escape('&END GLOBAL')}", "", inp, flags=re.DOTALL +) + +# %% +# Afterwards we define the :py:class:`ase.calculators.cp2k.CP2K`` calculator. Note that +# we disable all parameters because we want to use all options from our input file + +calc = CP2K( + inp=inp, + max_scf=None, + cutoff=None, + xc=None, + force_eval_method=None, + basis_set=None, + pseudo_potential=None, + basis_set_file=None, + potential_file=None, + stress_tensor=False, + poisson_solver=None, + print_level=None, + command="./cp2k_shell.ssmp --shell", +) + +# %% +# We now load a new struture, add the calculator and perform the computation. + +atoms = ase.io.read("example.xyz") +atoms.set_calculator(calc) +# atoms.get_potential_energy() diff --git a/generate-gallery.py b/generate-gallery.py index 1ccb0450..9d27e630 100644 --- a/generate-gallery.py +++ b/generate-gallery.py @@ -38,6 +38,8 @@ def __init__(self, example): "examples_dirs": os.path.join(HERE, example), "gallery_dirs": gallery_dir, "min_reported_time": 60, + "copyfile_regex": r".*\.(sh|xyz|cp2k)", + "matplotlib_animations": True, } self.builder = AttrDict() diff --git a/tox.ini b/tox.ini index 85417bce..7adf4aaa 100644 --- a/tox.ini +++ b/tox.ini @@ -42,6 +42,7 @@ commands = tox -e roy_gch tox -e sample_selection tox -e gaas_map + tox -e cp2k_run_batch sphinx-build {posargs:-E} -W -b html docs/src docs/build/html @@ -66,6 +67,10 @@ commands = {[testenv]install_conda_env} {[testenv]generate_gallery} +[testenv:cp2k_run_batch] +commands = + {[testenv]install_conda_env} + {[testenv]generate_gallery} [testenv:lint] deps =