From 683f21cb5d2f4a5a6865ff740ed3b1295efac8ab Mon Sep 17 00:00:00 2001 From: frostedoyster Date: Sat, 21 Sep 2024 06:56:49 +0200 Subject: [PATCH] Error estimates --- examples/heat-capacity/data/input.xml | 2 +- examples/heat-capacity/heat-capacity.py | 107 ++++++++++++++++++++---- 2 files changed, 94 insertions(+), 15 deletions(-) diff --git a/examples/heat-capacity/data/input.xml b/examples/heat-capacity/data/input.xml index f08ca8a4..935b1b9a 100644 --- a/examples/heat-capacity/data/input.xml +++ b/examples/heat-capacity/data/input.xml @@ -2,7 +2,7 @@ [ step, time{picosecond}, conserved{electronvolt}, temperature{kelvin}, kinetic_cv{electronvolt}, potential{electronvolt}, scaledcoords(fd_delta=5e-3) ] - 1000 + 5000 32342 diff --git a/examples/heat-capacity/heat-capacity.py b/examples/heat-capacity/heat-capacity.py index 21f6f49f..6d22efa0 100644 --- a/examples/heat-capacity/heat-capacity.py +++ b/examples/heat-capacity/heat-capacity.py @@ -47,13 +47,6 @@ xml_content = file.read() print(xml_content) -# %% -# NB1: In a realistic simulation you will want to increase the field -# ``total_steps``, to simulate at least a few 100s of picoseconds. -# -# NB2: To converge a simulation of water at room temperature, you -# typically need at least 32 beads. - # %% # We launch the i-PI and LAMMPS processes, exactly as in the # ``path-integrals`` example. @@ -90,10 +83,10 @@ ax.set_ylabel(r"energy / eV") ax.legend() plt.show() +plt.close() # %% -# We will now plot the values of the energy and heat capacity estimators -# as a function of time. As described in the ``i-PI`` +# As described in the ``i-PI`` # , # the two quantities returned by the ``scaledcoords`` output are ``eps_v`` # and ``eps_v'``, defined in the aforementioned @@ -103,15 +96,15 @@ # # First the energy: -eps_v = np.loadtxt("simulation.out")[20:, 6] -eps_v_prime = np.loadtxt("simulation.out")[20:, 7] -# discarding the first 20 steps which are highly non-equilibrated +eps_v = np.loadtxt("simulation.out")[100:, 6] +eps_v_prime = np.loadtxt("simulation.out")[100:, 7] +# discarding the first 100 steps which are highly non-equilibrated energy_estimator = eps_v # from the paper fix, ax = plt.subplots(1, 1, figsize=(4, 3), constrained_layout=True) ax.plot( - output_data["time"][20:], + output_data["time"][100:], energy_estimator, "b", label="Total energy$", @@ -120,6 +113,7 @@ ax.set_ylabel(r"$E / a.u.$") ax.legend() plt.show() +plt.close() # %% # And, finally, the heat capacity: @@ -132,4 +126,89 @@ heat_capacity = ( kB * (beta**2) * (np.mean(eps_v**2) - np.mean(eps_v) ** 2 - np.mean(eps_v_prime)) ) -print(f"Heat capacity: {heat_capacity:.2f} a.u.") +heat_capacity_per_molecule = heat_capacity / 32 # 32 molecules in the simulation +print(f"Heat capacity (per water molecule): {(heat_capacity_per_molecule/kB):.2f} kB") + + +# %% +# Finding an error estimate +# ------------------------- +# +# Especially with such an underconverged simulation, it is important to +# estimate the error in the heat capacity. +# +# Generally, errors on measurements are computed +# as "standard errors", i.e. the standard deviation of a series of data points +# divided by the square root of the number of data points. In our case, +# however, this is made more complicated by the correlation between +# close steps in the molecular dynamics trajectory, which would lead to an +# overestimation of the number of independent samples. To fix this, we can +# calculate the autocorrelation time of the estimators whose errors we +# want to estimate, and apply a correction factor to the number of samples. + + +def autocorrelate(x): + n = len(x) + xo = x - x.mean() # remove mean + acov = (np.correlate(xo, xo, "full"))[n - 1 :] + return acov[: len(acov) // 2] + + +def autocorrelation_time(x): + acov = autocorrelate(x) + return 1.0 + np.sum(acov) / acov[0] + + +# %% +# We can now calculate the error on the heat capacity estimate. + +# Autocorrelation times (i.e. number of steps needed to have independent samples) +autocorr_time_error_eps_v = autocorrelation_time(eps_v) +autocorr_time_error_eps_v_squared = autocorrelation_time(eps_v**2) +autocorr_time_error_eps_v_prime = autocorrelation_time(eps_v_prime) + +# Effective number of samples +effective_samples_eps_v = len(eps_v) / autocorr_time_error_eps_v +effective_samples_eps_v_squared = len(eps_v) / autocorr_time_error_eps_v_squared +effective_samples_eps_v_prime = len(eps_v) / autocorr_time_error_eps_v_prime + +# Standard errors using the effective number of samples +error_eps_v = np.std(eps_v) / np.sqrt(effective_samples_eps_v) +error_eps_v_squared = np.std(eps_v**2) / np.sqrt(effective_samples_eps_v_squared) +error_eps_v_prime = np.std(eps_v_prime) / np.sqrt(effective_samples_eps_v_prime) + +# Error on the heat capacity +error_heat_capacity = ( + kB + * (beta**2) + * np.sqrt( # error propagation in the sum of terms + error_eps_v_squared**2 + + (2.0 * np.mean(eps_v) * error_eps_v) ** 2 # error of **2 + + error_eps_v_prime**2 + ) +) + +error_heat_capacity_per_molecule = ( + error_heat_capacity / 32 +) # 32 molecules in the simulation + +print( + "Error on the heat capacity (per water molecule): " + f"{(error_heat_capacity_per_molecule/kB):.2f} kB" +) + + +# %% +# The obtained heat capacity is consistent with the values from the literature +# (see ...). +# However, the error is quite large, which is expected given the short simulation time. +# To reduce the error, one would need to run a longer simulation. Other important error +# sources, which are not accounted for in the error estimate, are the finite size of the +# system and number of beads. Both of these are too small in this example to give +# reliable results. +# +# In a realistic simulation, up to a few 100s of picoseconds might be needed to reduce +# the sampling error to a small value (1-10% of the heat capacity). For water at room +# temperature, you will need 32 beads at the very least (8 were used in this example). +# It is more difficult to give a general rule for the system size, but a few hundred +# water molecules would be a reasonable guess (32 were used in this example).