diff --git a/examples/heat-capacity/data/input.xml b/examples/heat-capacity/data/input.xml
index f08ca8a4..935b1b9a 100644
--- a/examples/heat-capacity/data/input.xml
+++ b/examples/heat-capacity/data/input.xml
@@ -2,7 +2,7 @@
- 1000
+ 5000
32342
diff --git a/examples/heat-capacity/heat-capacity.py b/examples/heat-capacity/heat-capacity.py
index 21f6f49f..6d22efa0 100644
--- a/examples/heat-capacity/heat-capacity.py
+++ b/examples/heat-capacity/heat-capacity.py
@@ -47,13 +47,6 @@
xml_content = file.read()
print(xml_content)
-# %%
-# NB1: In a realistic simulation you will want to increase the field
-# ``total_steps``, to simulate at least a few 100s of picoseconds.
-#
-# NB2: To converge a simulation of water at room temperature, you
-# typically need at least 32 beads.
-
# %%
# We launch the i-PI and LAMMPS processes, exactly as in the
# ``path-integrals`` example.
@@ -90,10 +83,10 @@
ax.set_ylabel(r"energy / eV")
ax.legend()
plt.show()
+plt.close()
# %%
-# We will now plot the values of the energy and heat capacity estimators
-# as a function of time. As described in the ``i-PI``
+# As described in the ``i-PI``
# ,
# the two quantities returned by the ``scaledcoords`` output are ``eps_v``
# and ``eps_v'``, defined in the aforementioned
@@ -103,15 +96,15 @@
#
# First the energy:
-eps_v = np.loadtxt("simulation.out")[20:, 6]
-eps_v_prime = np.loadtxt("simulation.out")[20:, 7]
-# discarding the first 20 steps which are highly non-equilibrated
+eps_v = np.loadtxt("simulation.out")[100:, 6]
+eps_v_prime = np.loadtxt("simulation.out")[100:, 7]
+# discarding the first 100 steps which are highly non-equilibrated
energy_estimator = eps_v # from the paper
fix, ax = plt.subplots(1, 1, figsize=(4, 3), constrained_layout=True)
ax.plot(
- output_data["time"][20:],
+ output_data["time"][100:],
energy_estimator,
"b",
label="Total energy$",
@@ -120,6 +113,7 @@
ax.set_ylabel(r"$E / a.u.$")
ax.legend()
plt.show()
+plt.close()
# %%
# And, finally, the heat capacity:
@@ -132,4 +126,89 @@
heat_capacity = (
kB * (beta**2) * (np.mean(eps_v**2) - np.mean(eps_v) ** 2 - np.mean(eps_v_prime))
)
-print(f"Heat capacity: {heat_capacity:.2f} a.u.")
+heat_capacity_per_molecule = heat_capacity / 32 # 32 molecules in the simulation
+print(f"Heat capacity (per water molecule): {(heat_capacity_per_molecule/kB):.2f} kB")
+
+
+# %%
+# Finding an error estimate
+# -------------------------
+#
+# Especially with such an underconverged simulation, it is important to
+# estimate the error in the heat capacity.
+#
+# Generally, errors on measurements are computed
+# as "standard errors", i.e. the standard deviation of a series of data points
+# divided by the square root of the number of data points. In our case,
+# however, this is made more complicated by the correlation between
+# close steps in the molecular dynamics trajectory, which would lead to an
+# overestimation of the number of independent samples. To fix this, we can
+# calculate the autocorrelation time of the estimators whose errors we
+# want to estimate, and apply a correction factor to the number of samples.
+
+
+def autocorrelate(x):
+ n = len(x)
+ xo = x - x.mean() # remove mean
+ acov = (np.correlate(xo, xo, "full"))[n - 1 :]
+ return acov[: len(acov) // 2]
+
+
+def autocorrelation_time(x):
+ acov = autocorrelate(x)
+ return 1.0 + np.sum(acov) / acov[0]
+
+
+# %%
+# We can now calculate the error on the heat capacity estimate.
+
+# Autocorrelation times (i.e. number of steps needed to have independent samples)
+autocorr_time_error_eps_v = autocorrelation_time(eps_v)
+autocorr_time_error_eps_v_squared = autocorrelation_time(eps_v**2)
+autocorr_time_error_eps_v_prime = autocorrelation_time(eps_v_prime)
+
+# Effective number of samples
+effective_samples_eps_v = len(eps_v) / autocorr_time_error_eps_v
+effective_samples_eps_v_squared = len(eps_v) / autocorr_time_error_eps_v_squared
+effective_samples_eps_v_prime = len(eps_v) / autocorr_time_error_eps_v_prime
+
+# Standard errors using the effective number of samples
+error_eps_v = np.std(eps_v) / np.sqrt(effective_samples_eps_v)
+error_eps_v_squared = np.std(eps_v**2) / np.sqrt(effective_samples_eps_v_squared)
+error_eps_v_prime = np.std(eps_v_prime) / np.sqrt(effective_samples_eps_v_prime)
+
+# Error on the heat capacity
+error_heat_capacity = (
+ kB
+ * (beta**2)
+ * np.sqrt( # error propagation in the sum of terms
+ error_eps_v_squared**2
+ + (2.0 * np.mean(eps_v) * error_eps_v) ** 2 # error of **2
+ + error_eps_v_prime**2
+ )
+)
+
+error_heat_capacity_per_molecule = (
+ error_heat_capacity / 32
+) # 32 molecules in the simulation
+
+print(
+ "Error on the heat capacity (per water molecule): "
+ f"{(error_heat_capacity_per_molecule/kB):.2f} kB"
+)
+
+
+# %%
+# The obtained heat capacity is consistent with the values from the literature
+# (see ...).
+# However, the error is quite large, which is expected given the short simulation time.
+# To reduce the error, one would need to run a longer simulation. Other important error
+# sources, which are not accounted for in the error estimate, are the finite size of the
+# system and number of beads. Both of these are too small in this example to give
+# reliable results.
+#
+# In a realistic simulation, up to a few 100s of picoseconds might be needed to reduce
+# the sampling error to a small value (1-10% of the heat capacity). For water at room
+# temperature, you will need 32 beads at the very least (8 were used in this example).
+# It is more difficult to give a general rule for the system size, but a few hundred
+# water molecules would be a reasonable guess (32 were used in this example).