Updated heat capacity example to my satisfaction

lab-cosmo · Sep 21, 2024 · 77597cf · 77597cf
1 parent 4b3466c
commit 77597cf
Show file tree

Hide file tree

Showing 5 changed files with 125 additions and 67 deletions.
diff --git a/examples/heat-capacity/data/in.lmp b/examples/heat-capacity/data/in.lmp
@@ -3,7 +3,7 @@ atom_style	full
 
 pair_style      lj/cut/tip4p/long 1 2 1 1 0.278072379 17.001
 # high-pppm precision and shift to get meaningful fd estimates
-kspace_style	pppm/tip4p 1e-6
+kspace_style	pppm/tip4p 1e-5
 pair_modify     shift yes
 bond_style      class2
 angle_style     harmonic

diff --git a/examples/heat-capacity/data/input.xml b/examples/heat-capacity/data/input.xml
@@ -1,6 +1,9 @@
 <simulation verbosity='medium' safe_stride='100'>
-  <output prefix='simulation'>
-      <properties filename='out'>  [ step, time{picosecond}, conserved, potential, kinetic_cv, scaledcoords(fd_delta=5e-3) ] </properties>
+  <output prefix='water-cv'>
+      <properties filename='out' stride='4'>  
+         [ step, time{picosecond}, conserved, potential, kinetic_cv, 
+           scaledcoords(fd_delta=5e-3) ] 
+      </properties>
   </output>
   <total_steps> 2000 </total_steps>
   <prng>

diff --git a/examples/heat-capacity/heat-capacity.py b/examples/heat-capacity/heat-capacity.py
@@ -2,7 +2,9 @@
 Quantum heat capacity of water
 ==============================
 
-:Authors: Michele Ceriotti `@ceriottm <https://github.com/ceriottm/>`_
+:Authors:
+Filippo Bigi `@frostedoyster <https://github.com/frostedoyster/>`_;
+Michele Ceriotti `@ceriottm <https://github.com/ceriottm/>`_
 
 This example shows how to estimate the heat capacity of liquid water
 from a path integral molecular dynamics simulation. The dynamics are
@@ -12,64 +14,90 @@
 model <http://doi.org/10.1063/1.3167790>`_.
 """
 
+import os
 import subprocess
 import time
+import xml.etree.ElementTree as ET
 
 import ipi
 import matplotlib.pyplot as plt
 import numpy as np
 
 
 # %%
-# A non-trivial estimator
-# -----------------------
+# A non-trivial energy estimator
+# ------------------------------
 #
-# As introduced in the ``path-integrals`` example, path-integral estimators
+# As introduced in the
+# `path-integrals example
+# <http://lab-cosmo.github.io/atomistic-cookbook/latest/examples/path-integrals>`_,
+# path-integral estimators
 # for observables that depend on momenta are generally not trivial to compute.
 #
-# In this example, we will focus on the heat capacity, which is one such
+# In this example, we will focus on the constant-volume heat capacity,
+# :math:`c_V`, which is one such
 # observable, and we will calculate it for liquid water at room temperature.
+# Because of the presence of high-frequency vibrations, many of the nuclear
+# degrees of freedom are trapped in the vibrational ground state, which reduces
+# substantially the heat capacity from the value that would be obtained
+# in the classical limit. See `this review
+# <http://doi.org/10.1021/acs.chemrev.5b00674>`_
+# for an overview of the impact of quantum nuclei on the properties of water.
+# From a computational perspective, this means that it is necessary to use
+# specialized simulations and estimators to evaluate the correct value.
 
 
 # %%
 # Running the PIMD calculation
-# ----------------------------
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # This follows the same steps as the ``path-integrals`` example. One important
 # difference is that we will request the ``scaledcoords`` output to the relevant
 # section of the ``i-PI`` input XML file, which
 # contains estimators that can be used to calculate the total energy and
-# heat capacity as defined in this `paper <https://arxiv.org/abs/physics/0505109>`_.
+# heat capacity as following
+# `Yamamoto, J. Chem. Phys. (2005) <https://arxiv.org/abs/physics/0505109>`_.
 #
-# The input file is shown below. It should be noted that the ``scaledcoords``
+# The input file is shown below. It should be noted that ``scaledcoords``
 # is given a finite differences displacement as a parameter. This is necessary
 # as the estimators require higher order derivatives of the potential energy,
-# which are calculated using finite differences.
-
-# Open and show the XML file
-with open("data/input.xml", "r") as file:
-    xml_content = file.read()
-print(xml_content)
+# which are calculated using finite differences. This also means that
+# evaluating the estimator adds substantial overhead (so it is wise to only
+# compute it every few simulation steps, to eliminate correlations between
+# snapshots) and that one should be careful to use well-converged simulation
+# parameters to avoid discontinuities and noise (for instance, we increase
+# the accuracy of the particle-mesh electrostatic calculation, and use a
+# shifted Lennard-Jones potential to avoid a discontinuity at the cutoff).
+
+# Open and show the relevant part of the input
+xmlroot = ET.parse("data/input.xml").getroot()
+print("      " + ET.tostring(xmlroot.find(".//properties"), encoding="unicode"))
 
 # %%
 # We launch the i-PI and LAMMPS processes, exactly as in the
 # ``path-integrals`` example.
 
-ipi_process = subprocess.Popen(["i-pi", "data/input.xml"])
-time.sleep(2)  # wait for i-PI to start
-lmp_process = [subprocess.Popen(["lmp", "-in", "data/in.lmp"]) for i in range(2)]
-
-ipi_process.wait()
-lmp_process[0].wait()
-lmp_process[1].wait()
-
-output_data, output_desc = ipi.read_output("simulation.out")
+# don't rerun if the outputs already exist
+ipi_process = None
+if not os.path.exists("water-cv.out"):
+    ipi_process = subprocess.Popen(["i-pi", "data/input.xml"])
+    time.sleep(2)  # wait for i-PI to start
+    lmp_process = [subprocess.Popen(["lmp", "-in", "data/in.lmp"]) for i in range(2)]
 
+# %%
+# Skip this cell if you want to run in the background
+if ipi_process is not None:
+    ipi_process.wait()
+    lmp_process[0].wait()
+    lmp_process[1].wait()
 
 # %%
+# Analyzing the results
+# ~~~~~~~~~~~~~~~~~~~~~
 # Let's plot the potential and conserved energy as a function of time,
 # just to check that the simulation ran sensibly.
 
+output_data, output_desc = ipi.read_output("water-cv.out")
 fix, ax = plt.subplots(1, 1, figsize=(4, 3), constrained_layout=True)
 ax.plot(
     output_data["time"],
@@ -84,19 +112,18 @@
     label="Conserved, $H$",
 )
 ax.set_xlabel(r"$t$ / ps")
-ax.set_ylabel(r"energy / eV")
+ax.set_ylabel(r"energy / a.u.")
 ax.legend()
 plt.show()
-plt.close()
 
 # %%
-# As described in the ``i-PI``
-# <documentation https://ipi-code.org/i-pi/output-tags.html>,
+# As described in the `i-PI documentation
+# <https://ipi-code.org/i-pi/output-tags.html>`_,
 # the two quantities returned by the ``scaledcoords`` output are ``eps_v``
 # and ``eps_v'``, defined in the aforementioned
 # `paper <https://arxiv.org/abs/physics/0505109>`_.
 #
-# These estimators (:math:`\eps_v` and :math:`\eps_v'`) are derived in the
+# These estimators (:math:`\epsilon_v` and :math:`\epsilon_v'`) are derived in the
 # "scaled coordinates" formalism, which is a useful trick to avoid the
 # growth of the error in the instantaneous values of the estimators with
 # the number of beads used in the path integral simulation.
@@ -105,26 +132,27 @@
 # heat capacity from these estimators:
 #
 # .. math::
-#   E = \langle \eps_v \rangle
-#   C_V = k_B \beta^2 \left( \langle \eps_v^2 \rangle - \langle
-#       \eps_v \rangle^2 - \langle \eps_v' \rangle \right)
+#   E = \langle \epsilon_v \rangle \quad
+#   C_V = k_B \beta^2 \left( \langle \epsilon_v^2 \rangle - \langle
+#       \epsilon_v \rangle^2 - \langle \epsilon_v' \rangle \right)
 #
 # First the energy, whose estimator will be compared to the total energy
 # calculated as the sum of the potential and kinetic energy estimators.
 # Since the kinetic energy is itself calculated from a scaled-coordinates
 # estimator (the "centroid virial" estimator), the two total energies are
 # the same.
 
-eps_v = np.loadtxt("simulation.out")[:, 5]
-eps_v_prime = np.loadtxt("simulation.out")[:, 6]
+eps_v = output_data["scaledcoords(fd_delta=5e-3)"][:, 0]
+eps_v_prime = output_data["scaledcoords(fd_delta=5e-3)"][:, 1]
 
 energy_estimator = eps_v  # first formula
 
 fix, ax = plt.subplots(1, 1, figsize=(4, 3), constrained_layout=True)
 ax.plot(
     output_data["time"],
     energy_estimator - energy_estimator[0],
-    label="Total energy (scaled coordinates estimator)",
+    "b-",
+    label="scaled coordinates estimator",
 )
 ax.plot(
     output_data["time"][:],
@@ -134,35 +162,51 @@
         + output_data["kinetic_cv"]
         - output_data["kinetic_cv"][0]
     ),
-    label="Total energy (potential + kinetic)",
+    "r.",
+    label="potential + virial kinetic",
 )
 ax.set_xlabel(r"$t$ / ps")
-ax.set_ylabel(r"$E / a.u.$")
+ax.set_ylabel(r"total energy / a.u.")
 ax.legend()
 plt.show()
 
 
 # %%
-# And, finally, the heat capacity:
+# And, finally, the heat capacity. Note that normally the simulation
+# requires a few ps for equilibration. Here we discard a few dozen steps
+# to eliminate the initial jump, which is due to the relaxation of the
+# ring polymers starting from a single atomic configuration.
 
 # i-PI scaledcoords outputs are in atomic units (see docs)
 kB = 3.16681e-6  # Boltzmann constant in atomic units
 T = 298.0  # temperature in K, as defined in the input file
 beta = 1.0 / (kB * T)
 
+skip = 20
 heat_capacity = (  # second formula
-    kB * (beta**2) * (np.mean(eps_v**2) - np.mean(eps_v) ** 2 - np.mean(eps_v_prime))
+    kB
+    * (beta**2)
+    * (
+        np.mean(eps_v[skip:] ** 2)
+        - np.mean(eps_v[skip:]) ** 2
+        - np.mean(eps_v_prime[skip:])
+    )
 )
 heat_capacity_per_molecule = heat_capacity / 32  # 32 molecules in the simulation
 print(f"Heat capacity (per water molecule): {(heat_capacity_per_molecule/kB):.2f} kB")
 
+# %%
+# You may recognize that the first part of the estimator is reminiscent
+# of the classical estimator for the heat capacity as the fluctuations of the
+# (quantum) total energy, which in this case however requires a correction given
+# by the mean of the second part of the scaled-coordinates estimator.
 
 # %%
-# Finding an error estimate
-# -------------------------
+# Estimating the statistical error
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # Especially with such an underconverged simulation, it is important to
-# estimate the error in the heat capacity.
+# estimate the statistical error in the heat capacity.
 #
 # Generally, errors on measurements are computed
 # as "standard errors", i.e. the standard deviation of a series of data points
@@ -187,32 +231,38 @@ def autocorrelation_time(x):
 
 
 # %%
-# We can now calculate the error on the heat capacity estimate.
+# Using these helper functions, we can now calculate the error on the various
+# parts of the heat capacity estimator. Note also the autocorrelation times, that
+# are just a little larger than one, indicating that the stride used to print out
+# the estimators is appropriate (as there is little correlation between the samples).
 
 # Autocorrelation times (i.e. number of steps needed to have independent samples)
-autocorr_time_error_eps_v = autocorrelation_time(eps_v)
-autocorr_time_error_eps_v_squared = autocorrelation_time(eps_v**2)
-autocorr_time_error_eps_v_prime = autocorrelation_time(eps_v_prime)
+autocorr_time_error_delta_eps_v = autocorrelation_time(
+    (eps_v[skip:] - eps_v[skip:].mean()) ** 2
+)
+autocorr_time_error_eps_v_prime = autocorrelation_time(eps_v_prime[skip:])
+
+print(
+    f"""
+Autocorrelation times (in number of samples):
+(eps-<eps>)^2: {autocorr_time_error_delta_eps_v:.2f}
+eps':          {autocorr_time_error_eps_v_prime:.2f}
+"""
+)
 
 # Effective number of samples
-effective_samples_eps_v = len(eps_v) / autocorr_time_error_eps_v
-effective_samples_eps_v_squared = len(eps_v) / autocorr_time_error_eps_v_squared
-effective_samples_eps_v_prime = len(eps_v) / autocorr_time_error_eps_v_prime
+effective_samples_delta_eps_v = len(eps_v[skip:]) / autocorr_time_error_delta_eps_v
+effective_samples_eps_v_prime = len(eps_v[skip:]) / autocorr_time_error_eps_v_prime
 
 # Standard errors using the effective number of samples
-error_eps_v = np.std(eps_v) / np.sqrt(effective_samples_eps_v)
-error_eps_v_squared = np.std(eps_v**2) / np.sqrt(effective_samples_eps_v_squared)
-error_eps_v_prime = np.std(eps_v_prime) / np.sqrt(effective_samples_eps_v_prime)
+error_delta_eps_v = np.std((eps_v[skip:] - eps_v[skip:].mean()) ** 2) / np.sqrt(
+    effective_samples_delta_eps_v
+)
+error_eps_v_prime = np.std(eps_v_prime[skip:]) / np.sqrt(effective_samples_eps_v_prime)
 
-# Error on the heat capacity
+# Error on the heat capacity (assuming quadrature sum)
 error_heat_capacity = (
-    kB
-    * (beta**2)
-    * np.sqrt(  # error propagation in the sum of terms
-        error_eps_v_squared**2
-        + (2.0 * np.mean(eps_v) * error_eps_v) ** 2  # error of <eps_v>**2
-        + error_eps_v_prime**2
-    )
+    kB * (beta**2) * np.sqrt(error_delta_eps_v**2 + error_eps_v_prime**2)
 )
 
 error_heat_capacity_per_molecule = (
@@ -227,15 +277,20 @@ def autocorrelation_time(x):
 
 # %%
 # The obtained heat capacity is consistent with the values from the literature
-# (see ...).
+# (see e.g. `Ceriotti et al., J. Chem. Phys. (2011)
+# <http://doi.org/10.1063/1.3556661>`_
+# where the convergence of the heat capacity with number of beads is shown for the same
+# water model used in this example).
 # However, the error is quite large, which is expected given the short simulation time.
 # To reduce the error, one would need to run a longer simulation. Other important error
 # sources, which are not accounted for in the error estimate, are the finite size of the
 # system and number of beads. Both of these are too small in this example to give
 # reliable results.
 #
 # In a realistic simulation, up to a few 100s of picoseconds might be needed to reduce
-# the sampling error to a small value (1-10% of the heat capacity). For water at room
+# the sampling error to a small value (1-2% of the heat capacity). For water at room
 # temperature, you will need 32 beads at the very least (8 were used in this example).
-# It is more difficult to give a general rule for the system size, but a few hundred
-# water molecules would be a reasonable guess (32 were used in this example).
+# It is more difficult to give a general rule for the system size: (quantum) energy
+# fluctuations are usually localized, but to guarantee accurate sampling of the
+# liquid structure, a few hundred water molecules would be a reasonable guess
+# (32 were used in this example).
diff --git a/examples/pi-metad/pi-metad.py b/examples/pi-metad/pi-metad.py
@@ -467,7 +467,7 @@ def moving_average(arr, window_size):
 # -----------------------------------
 #
 # You can see `this recipe
-# <http://lab-cosmo.github.io/atomistic-cookbook/examples/path-integrals>`_
+# <http://lab-cosmo.github.io/atomistic-cookbook/examples/latest/path-integrals>`_
 # for a brief introduction to path integral simulations with `i-PI`.
 # From a practical perspective, very little needs to change with respect
 # to the classical case.

diff --git a/src/__pycache__/get_examples.cpython-312.pyc b/src/__pycache__/get_examples.cpython-312.pyc