From 994a4e401412df0cb3fc127bc0da390f2daac02a Mon Sep 17 00:00:00 2001 From: "Adam J. Jackson" Date: Tue, 30 Jul 2024 17:20:43 +0100 Subject: [PATCH] Improve handling of spectral units (#311) * Create "reciprocal_spectroscopy" units context - Pint tends to choke on conversions between reciprocal energy and wavenumber. These don't come up too often but are an issue when dealing with histograms and spectra. - Here we add a few more "definitions" for Pint to use in such cases. - This creates a slightly scary range of paths around unit conversions. By putting them in a separate context, we can ensure they are only introduced when they are likely to be needed. * Use proper units for imported CASTEP DOS DOS units should be reciprocal of energy/frequency axis: this gives dimensionless area (count) and scales properly with unit changes. It looks like DOS was expressed in energy/frequency units in part to avoid some Pint conversion challenges. The new reciprocal_spectroscopy context should allow those to be done more safely and directly. * Rework broadening internal units handling - Broadening logic tries to convert to hartree and back again, but this behaves unpredictably (or otherwide badly) for non-energy units. - Instead we can use the bin units; this still requires that the relevant parameters are dimensionally-consistent _with each other_. - While working on this I noticed that very small bin values (i.e. when using large bin _units_) are incorrectly identified as having equal width due to the "atol" which is intended to stabilise comparisons near zero. Bin widths should always be finite, so we can make this stricter and always base agreement on the relative width. * Faster, more robust spectrum getters The current spectrum getters create a unit conversion factor and then multiply the raw array by it. There are two problems with this: - Multiplying any iterator by a Quantity or Unit leads to Pint checking through all the data for consistency. This can be quite expensive, and is unnecessary for these trusted array objects. When iterating over spectra yielded from a Spectrum1DCollection, the cost can become quite noticeable. - Some unit conversions in the spectroscopy context involve taking a reciprocal, e.g. conversion from cm -> 1/cm is allowed. But if this is done to a _factor_ the data itself will not be inverted. If we start with a Spectrum1D with x_data in wavenumber (1/cm) then spectrum.x_data.to("cm") and spectrum.x_data_unit = "cm" spectrum.x_data will not give the same result; the second method leaves the actual values untouched (as cm / (1/cm) = 1). * Add full set of length^-1 <-> energy conversions to rs context By providing Pint with "direct" conversion routes between wavenumber and energy (in each direction and inversion), Pint avoids unnecessarily taking the reciprocal of data and encountering divide-by-zero/infinite-value situations. It doesn't look like the existing divide-by-zero situations were causing problems with correctness of results, but its nice to clear the warnings (and save some CPU?) so that when we see this warning we know it is worth investigating. * Use parens to force reciprocal_spectroscopy application order Performance tests show this gives a significant improvement for calls to `.to()` and slightly worse performance for (the much-faster) `.ito()`. * Use a non-returning function for unit conversion check Reviewer found the ``_ =`` distracting, but it is nice to clarify here that we don't care about the return value. Switching to ``.ito()`` also achieves that (and comes with a miniscule performance advantage.) * Use toolz for a cleaner data transformation in test suite I hope to use toolz more in future, for now it is just needed for this one test! --- CHANGELOG.rst | 25 ++++++++++++++++++- euphonic/__init__.py | 5 ++++ euphonic/broadening.py | 11 ++++---- .../reciprocal_spectroscopy_definitions.txt | 9 +++++++ euphonic/readers/castep.py | 14 ++++++----- euphonic/spectra.py | 16 +++++++----- euphonic/validate.py | 3 ++- .../lzo_222_full_castep_adaptive_dos.json | 2 +- .../quartz_554_full_castep_adaptive_dos.json | 2 +- .../test/euphonic_test/test_castep_reader.py | 19 +++++++++----- .../test/euphonic_test/test_spectrum1d.py | 3 ++- tests_and_analysis/test/utils.py | 4 ++- tests_and_analysis/tox_requirements.txt | 1 + 13 files changed, 84 insertions(+), 30 deletions(-) create mode 100644 euphonic/data/reciprocal_spectroscopy_definitions.txt diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7865f58fd..320dfb901 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -24,11 +24,34 @@ - Minimum version of threadpoolctl increased from 1.0 to 3.0. -- Big fixes + - `toolz `_ is + added to the testing (tox) requirements + +- Improvements + + - A "reciprocal_spectroscopy" Pint context is made available in the + unit registry for tricky conversions between reciprocal + frequency/energy units. It is not active by default but can be + enabled with e.g. + + (10 * ureg("1 / meV")).to("cm", "reciprocal_spectroscopy") + + This can also help to avoid divide-by-zero issues when performing + energy <-> wavenumber conversions. + +- Bug fixes - Metadata strings from Castep-imported PDOS data are now converted from numpy strings to native Python strings. + - Spectra from CASTEP .phonon_dos files are now imported with units + of reciprocal energy (e.g. 1/meV) + +- Maintenance + + - Cleared up unit-conversion-related warnings, de-cluttering the + expected test suite output. + `v1.3.2 `_ ----------------------------------------------------------------------------- diff --git a/euphonic/__init__.py b/euphonic/__init__.py index eec22412b..a1215a442 100644 --- a/euphonic/__init__.py +++ b/euphonic/__init__.py @@ -1,5 +1,6 @@ from importlib.resources import files +from . import data from . import _version __version__ = _version.get_versions()['version'] @@ -8,6 +9,10 @@ # Create ureg here so it is only created once ureg = UnitRegistry() + +# Add reciprocal_spectroscopy environment used for tricky conversions +ureg.load_definitions(files(data) / "reciprocal_spectroscopy_definitions.txt") + ureg.enable_contexts('spectroscopy') Quantity = ureg.Quantity diff --git a/euphonic/broadening.py b/euphonic/broadening.py index 8e0f6339d..0cb59e450 100644 --- a/euphonic/broadening.py +++ b/euphonic/broadening.py @@ -149,14 +149,13 @@ def width_interpolated_broadening( ydata """ - conv = 1*ureg('hartree').to(bins.units) - return _width_interpolated_broadening(bins.to('hartree').magnitude, - x.to('hartree').magnitude, - widths.to('hartree').magnitude, + return _width_interpolated_broadening(bins.magnitude, + x.to(bins.units).magnitude, + widths.to(bins.units).magnitude, weights, adaptive_error, shape=shape, - fit=fit) / conv + fit=fit) / bins.units def _lorentzian(x: np.ndarray, gamma: np.ndarray) -> np.ndarray: @@ -226,7 +225,7 @@ def _width_interpolated_broadening( # bins should be regularly spaced, check that this is the case and # raise a warning if not bin_widths = np.diff(bins) - if not np.all(np.isclose(bin_widths, bin_widths[0])): + if not np.all(np.isclose(bin_widths, bin_widths[0], atol=0.)): warnings.warn('Not all bin widths are equal, so broadening by ' 'convolution will give incorrect results.', stacklevel=3) diff --git a/euphonic/data/reciprocal_spectroscopy_definitions.txt b/euphonic/data/reciprocal_spectroscopy_definitions.txt new file mode 100644 index 000000000..665af7fb1 --- /dev/null +++ b/euphonic/data/reciprocal_spectroscopy_definitions.txt @@ -0,0 +1,9 @@ +@context reciprocal_spectroscopy = rs + 1 / [frequency] <-> 1 / [length]: 1 / value / speed_of_light + 1 / [energy] -> 1 / [frequency]: planck_constant * value + 1 / [frequency] -> 1 / [energy]: value / planck_constant + [length] -> 1 / [energy]: value / (planck_constant * speed_of_light) + 1 / [energy] -> [length]: value * (planck_constant * speed_of_light) + 1 / [length] -> [energy]: value * (planck_constant * speed_of_light) + [energy] -> 1 / [length]: value / (planck_constant * speed_of_light) +@end diff --git a/euphonic/readers/castep.py b/euphonic/readers/castep.py index b170ab052..2e150c54b 100644 --- a/euphonic/readers/castep.py +++ b/euphonic/readers/castep.py @@ -102,16 +102,18 @@ def read_phonon_dos_data( data_dict['dos_bins_unit'] = frequencies_unit data_dict['dos'] = {} - data_dict['dos_unit'] = frequencies_unit - # Avoid issues in converting DOS, Pint allows - # cm^-1 -> meV but not cm -> 1/meV - dos_conv = (1*ureg('1/cm').to(frequencies_unit)).magnitude + data_dict['dos_unit'] = f"1/{frequencies_unit}" + dos_conv = ureg('1 / (1/cm)' + ).to(data_dict['dos_unit'], "reciprocal_spectroscopy" + ).magnitude + dos_dict = data_dict['dos'] - dos_dict['Total'] = dos_data[:, 1]/dos_conv + dos_dict['Total'] = dos_data[:, 1] * dos_conv + _, idx = np.unique(atom_type, return_index=True) unique_types = atom_type[np.sort(idx)] for i, species in enumerate(unique_types): - dos_dict[str(species)] = dos_data[:, i + 2]/dos_conv + dos_dict[str(species)] = dos_data[:, i + 2] * dos_conv return data_dict diff --git a/euphonic/spectra.py b/euphonic/spectra.py index 3626cd29d..6d4a89d85 100644 --- a/euphonic/spectra.py +++ b/euphonic/spectra.py @@ -1,3 +1,5 @@ +# pylint: disable=no-member + from abc import ABC, abstractmethod import collections import copy @@ -42,8 +44,8 @@ def _set_data(self, data: Quantity, attr_name: str) -> None: @property def x_data(self) -> Quantity: - return self._x_data*ureg(self._internal_x_data_unit).to( - self.x_data_unit) + return ureg.Quantity(self._x_data, self._internal_x_data_unit + ).to(self.x_data_unit, "reciprocal_spectroscopy") @x_data.setter def x_data(self, value: Quantity) -> None: @@ -52,8 +54,8 @@ def x_data(self, value: Quantity) -> None: @property def y_data(self) -> Quantity: - return self._y_data*ureg(self._internal_y_data_unit).to( - self.y_data_unit) + return ureg.Quantity(self._y_data, self._internal_y_data_unit).to( + self.y_data_unit, "reciprocal_spectroscopy") @y_data.setter def y_data(self, value: Quantity) -> None: @@ -561,6 +563,7 @@ def from_castep_phonon_dos(cls: Type[T], filename: str, else: metadata['species'] = element metadata['label'] = element + return cls(data['dos_bins']*ureg(data['dos_bins_unit']), data['dos'][element]*ureg(data['dos_unit']), metadata=metadata) @@ -1318,8 +1321,9 @@ def __init__(self, x_data: Quantity, y_data: Quantity, @property def z_data(self) -> Quantity: - return self._z_data*ureg(self._internal_z_data_unit).to( - self.z_data_unit) + return ureg.Quantity( + self._z_data, self._internal_z_data_unit + ).to(self.z_data_unit, "reciprocal_spectroscopy") @z_data.setter def z_data(self, value: Quantity) -> None: diff --git a/euphonic/validate.py b/euphonic/validate.py index 75c123870..21d38cc03 100644 --- a/euphonic/validate.py +++ b/euphonic/validate.py @@ -82,7 +82,8 @@ def _check_unit_conversion(obj: object, attr_name: str, attr_value: Any, if hasattr(obj, attr_name): if attr_name in unit_attrs: try: - _ = ureg(getattr(obj, attr_name)).to(attr_value) + ureg(getattr(obj, attr_name)).ito(attr_value, + "reciprocal_spectroscopy") except DimensionalityError: raise ValueError(( f'"{attr_value}" is not a known dimensionally-consistent ' diff --git a/tests_and_analysis/test/data/spectrum1dcollection/lzo_222_full_castep_adaptive_dos.json b/tests_and_analysis/test/data/spectrum1dcollection/lzo_222_full_castep_adaptive_dos.json index 177163969..cf86d2ce8 100644 --- a/tests_and_analysis/test/data/spectrum1dcollection/lzo_222_full_castep_adaptive_dos.json +++ b/tests_and_analysis/test/data/spectrum1dcollection/lzo_222_full_castep_adaptive_dos.json @@ -50038,5 +50038,5 @@ 0.0 ] ], - "y_data_unit": "millielectron_volt" + "y_data_unit": "1 / millielectron_volt" } diff --git a/tests_and_analysis/test/data/spectrum1dcollection/quartz_554_full_castep_adaptive_dos.json b/tests_and_analysis/test/data/spectrum1dcollection/quartz_554_full_castep_adaptive_dos.json index 88c2afcce..5bb411c04 100644 --- a/tests_and_analysis/test/data/spectrum1dcollection/quartz_554_full_castep_adaptive_dos.json +++ b/tests_and_analysis/test/data/spectrum1dcollection/quartz_554_full_castep_adaptive_dos.json @@ -12031,5 +12031,5 @@ 0.0 ] ], - "y_data_unit": "millielectron_volt" + "y_data_unit": "1 / millielectron_volt" } diff --git a/tests_and_analysis/test/euphonic_test/test_castep_reader.py b/tests_and_analysis/test/euphonic_test/test_castep_reader.py index 31867a8eb..0ae5cbdea 100644 --- a/tests_and_analysis/test/euphonic_test/test_castep_reader.py +++ b/tests_and_analysis/test/euphonic_test/test_castep_reader.py @@ -4,6 +4,7 @@ import pytest import numpy as np import numpy.testing as npt +from toolz.dicttoolz import valmap from euphonic.io import _from_json_dict from euphonic.util import direction_changed, mp_grid, get_qpoint_labels @@ -41,12 +42,18 @@ def check_dict(dct, expected_dct): check_dict(dct[exp_key], exp_val) else: assert dct[exp_key] == exp_val - # Temporary solution until test data has been regenerated - # units have changed - expected_dos_data['dos_unit'] = 'meV' - dos_conv = (1*ureg('1/cm').to('meV')).magnitude - for key, value in expected_dos_data['dos'].items(): - expected_dos_data['dos'][key] = [x/dos_conv for x in value] + + # Convert ref data from cm to 1/meV to match current behaviour + expected_dos_data['dos_unit'] = '1/meV' + + def transform(dos: list[float]) -> list[float]: + return (ureg.Quantity(dos, "cm") + .to("1 / meV", "reciprocal_spectroscopy") + .magnitude + .tolist()) + + expected_dos_data["dos"] = valmap(transform, expected_dos_data["dos"]) + check_dict(dos_data, expected_dos_data) diff --git a/tests_and_analysis/test/euphonic_test/test_spectrum1d.py b/tests_and_analysis/test/euphonic_test/test_spectrum1d.py index 30ce0952c..e2def279c 100644 --- a/tests_and_analysis/test/euphonic_test/test_spectrum1d.py +++ b/tests_and_analysis/test/euphonic_test/test_spectrum1d.py @@ -182,7 +182,8 @@ def create_from_castep_phonon_dos(self, request): pytest.lazy_fixture('create_from_constructor'), pytest.lazy_fixture('create_from_json'), pytest.lazy_fixture('create_from_dict'), - pytest.lazy_fixture('create_from_castep_phonon_dos')]) + pytest.lazy_fixture('create_from_castep_phonon_dos'), + ]) def test_correct_object_creation(self, spec1d_creator): spec1d, expected_spec1d = spec1d_creator check_spectrum1d(spec1d, expected_spec1d) diff --git a/tests_and_analysis/test/utils.py b/tests_and_analysis/test/utils.py index f30e9fc89..a3af3bd35 100644 --- a/tests_and_analysis/test/utils.py +++ b/tests_and_analysis/test/utils.py @@ -195,7 +195,9 @@ def check_unit_conversion(obj: object, attr: str, unit: str) -> None: else: assert getattr(obj, attr).units == ureg(unit) npt.assert_allclose(getattr(obj, attr).magnitude, - original_attr_value.to(unit).magnitude) + (original_attr_value + .to(unit, "reciprocal_spectroscopy") + .magnitude)) def check_property_setters(obj: object, attr: str, unit: str, diff --git a/tests_and_analysis/tox_requirements.txt b/tests_and_analysis/tox_requirements.txt index 4f4d53206..d7f9003d3 100644 --- a/tests_and_analysis/tox_requirements.txt +++ b/tests_and_analysis/tox_requirements.txt @@ -5,3 +5,4 @@ pytest-mock pytest-lazy-fixture pytest-xvfb python-slugify +toolz