From 34ae13c5701597e4dc03adfdc373bd971a9445a4 Mon Sep 17 00:00:00 2001 From: Saransh Singh Date: Wed, 3 Jan 2024 18:47:13 -0800 Subject: [PATCH 01/12] first commit to numba-fy the findDuplicateVectors routine. Function is 3x faster after first compile. --- hexrd/matrixutil.py | 80 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/hexrd/matrixutil.py b/hexrd/matrixutil.py index 9ee43b5a4..757a35f83 100644 --- a/hexrd/matrixutil.py +++ b/hexrd/matrixutil.py @@ -32,11 +32,12 @@ from scipy import sparse +from hexrd.utils.decorators import numba_njit_if_available from hexrd import constants from hexrd.constants import USE_NUMBA if USE_NUMBA: import numba - + from numba import prange # module variables sqr6i = 1./np.sqrt(6.) @@ -682,6 +683,83 @@ def findDuplicateVectors(vec, tol=vTol, equivPM=False): return eqv, uid +def findDuplicateVectors_numba(vec, tol=vTol, equivPM=False): + + eqv = _findduplicatevectors(vec, tol, equivPM) + uid = np.arange(0, vec.shape[1], dtype=np.int64) + idx = eqv[~np.isnan(eqv)].astype(np.int64) + uid2 = np.delete(uid, idx) + + + return eqv, uid2 + +@numba_njit_if_available(cache=True, nogil=True, parallel=True) +def _findduplicatevectors(vec, tol, equivPM): + """ + Find vectors in an array that are equivalent to within + a specified tolerance. code is accelerated by numba + + USAGE: + + eqv = DuplicateVectors(vec, *tol) + + INPUT: + + 1) vec is n x m, a double array of m horizontally concatenated + n-dimensional vectors. + *2) tol is 1 x 1, a scalar tolerance. If not specified, the default + tolerance is 1e-14. + *3) set equivPM to True if vec and -vec + are to be treated as equivalent + + OUTPUT: + + 1) eqv is 1 x p, a list of p equivalence relationships. + + NOTES: + + Each equivalence relationship is a 1 x q vector of indices that + represent the locations of duplicate columns/entries in the array + vec. For example: + + | 1 2 2 2 1 2 7 | + vec = | | + | 2 3 5 3 2 3 3 | + + eqv = [[1x2 double] [1x3 double]], where + + eqv[0] = [0 4] + eqv[1] = [1 3 5] + """ + + if equivPM: + vec2 = np.abs(vec.copy()) + + n = vec.shape[0]; m = vec.shape[1] + + eqv = np.ones_like(vec) + eqv[:] = np.nan + + for ii in prange(m): + ctr = 0 + eqv_elem = np.zeros((m, ), dtype=np.int64) + + for jj in prange(m): + if jj > ii: + + if equivPM: + diff = np.sum(np.abs(vec[:,ii]-vec2[:,jj])) + else: + diff = np.sum(np.abs(vec[:,ii]-vec[:,jj])) + + if diff < tol: + eqv_elem[ctr] = jj + ctr += 1 + + for kk in range(ctr): + eqv[ii, kk] = eqv_elem[kk] + + return eqv def normvec(v): mag = np.linalg.norm(v) From 30f9bde596cba24457cf678f54cbbe481db0ff10 Mon Sep 17 00:00:00 2001 From: Saransh Singh Date: Wed, 3 Jan 2024 19:18:00 -0800 Subject: [PATCH 02/12] Tested. Most likely found a bug in existing code. --- hexrd/matrixutil.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/hexrd/matrixutil.py b/hexrd/matrixutil.py index 757a35f83..93f5bdf32 100644 --- a/hexrd/matrixutil.py +++ b/hexrd/matrixutil.py @@ -688,10 +688,13 @@ def findDuplicateVectors_numba(vec, tol=vTol, equivPM=False): eqv = _findduplicatevectors(vec, tol, equivPM) uid = np.arange(0, vec.shape[1], dtype=np.int64) idx = eqv[~np.isnan(eqv)].astype(np.int64) - uid2 = np.delete(uid, idx) - - - return eqv, uid2 + uid2 = list(np.delete(uid, idx)) + eqv2 = [] + for ii in range(eqv.shape[1]): + v = eqv[ii, ~np.isnan(eqv[ii, :])] + if v.shape[0] > 0: + eqv2.append([ii] + list(v.astype(np.int64))) + return eqv2, uid2 @numba_njit_if_available(cache=True, nogil=True, parallel=True) def _findduplicatevectors(vec, tol, equivPM): @@ -737,7 +740,7 @@ def _findduplicatevectors(vec, tol, equivPM): n = vec.shape[0]; m = vec.shape[1] - eqv = np.ones_like(vec) + eqv = np.zeros((m, m), dtype=np.float64) eqv[:] = np.nan for ii in prange(m): From c877ac61fbc84bb2152c5707ed9ea8682269b8af Mon Sep 17 00:00:00 2001 From: Saransh Singh Date: Thu, 4 Jan 2024 10:06:42 -0800 Subject: [PATCH 03/12] Use nan mask only once. --- hexrd/matrixutil.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hexrd/matrixutil.py b/hexrd/matrixutil.py index 93f5bdf32..2066ced85 100644 --- a/hexrd/matrixutil.py +++ b/hexrd/matrixutil.py @@ -690,8 +690,9 @@ def findDuplicateVectors_numba(vec, tol=vTol, equivPM=False): idx = eqv[~np.isnan(eqv)].astype(np.int64) uid2 = list(np.delete(uid, idx)) eqv2 = [] + mask = ~np.isnan(eqv) for ii in range(eqv.shape[1]): - v = eqv[ii, ~np.isnan(eqv[ii, :])] + v = eqv[ii, mask[ii, :]] if v.shape[0] > 0: eqv2.append([ii] + list(v.astype(np.int64))) return eqv2, uid2 From 8b51fcff02490f4a53d1322ae969ee4b0e0f522b Mon Sep 17 00:00:00 2001 From: Saransh Singh Date: Thu, 4 Jan 2024 10:45:21 -0800 Subject: [PATCH 04/12] rename function to use numba one across library. --- hexrd/matrixutil.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hexrd/matrixutil.py b/hexrd/matrixutil.py index 2066ced85..c1dfadd18 100644 --- a/hexrd/matrixutil.py +++ b/hexrd/matrixutil.py @@ -583,7 +583,7 @@ def uniqueVectors(v, tol=1.0e-12): return vSrt[:, ivInd[0:nUniq]] -def findDuplicateVectors(vec, tol=vTol, equivPM=False): +def findDuplicateVectors_old(vec, tol=vTol, equivPM=False): """ Find vectors in an array that are equivalent to within a specified tolerance @@ -683,7 +683,7 @@ def findDuplicateVectors(vec, tol=vTol, equivPM=False): return eqv, uid -def findDuplicateVectors_numba(vec, tol=vTol, equivPM=False): +def findDuplicateVectors(vec, tol=vTol, equivPM=False): eqv = _findduplicatevectors(vec, tol, equivPM) uid = np.arange(0, vec.shape[1], dtype=np.int64) From 46011b7351efa0618a65800e24a77ce01f95568c Mon Sep 17 00:00:00 2001 From: Saransh Singh Date: Thu, 4 Jan 2024 10:59:36 -0800 Subject: [PATCH 05/12] PEP8 --- hexrd/matrixutil.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/hexrd/matrixutil.py b/hexrd/matrixutil.py index c1dfadd18..bf675fb95 100644 --- a/hexrd/matrixutil.py +++ b/hexrd/matrixutil.py @@ -684,7 +684,6 @@ def findDuplicateVectors_old(vec, tol=vTol, equivPM=False): return eqv, uid def findDuplicateVectors(vec, tol=vTol, equivPM=False): - eqv = _findduplicatevectors(vec, tol, equivPM) uid = np.arange(0, vec.shape[1], dtype=np.int64) idx = eqv[~np.isnan(eqv)].astype(np.int64) @@ -697,6 +696,7 @@ def findDuplicateVectors(vec, tol=vTol, equivPM=False): eqv2.append([ii] + list(v.astype(np.int64))) return eqv2, uid2 + @numba_njit_if_available(cache=True, nogil=True, parallel=True) def _findduplicatevectors(vec, tol, equivPM): """ @@ -739,7 +739,8 @@ def _findduplicatevectors(vec, tol, equivPM): if equivPM: vec2 = np.abs(vec.copy()) - n = vec.shape[0]; m = vec.shape[1] + n = vec.shape[0] + m = vec.shape[1] eqv = np.zeros((m, m), dtype=np.float64) eqv[:] = np.nan @@ -752,9 +753,9 @@ def _findduplicatevectors(vec, tol, equivPM): if jj > ii: if equivPM: - diff = np.sum(np.abs(vec[:,ii]-vec2[:,jj])) + diff = np.sum(np.abs(vec[:, ii]-vec2[:, jj])) else: - diff = np.sum(np.abs(vec[:,ii]-vec[:,jj])) + diff = np.sum(np.abs(vec[:, ii]-vec[:, jj])) if diff < tol: eqv_elem[ctr] = jj From a94d5cd6213494fa4700969e8df75dfa8d63c577 Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Thu, 4 Jan 2024 13:22:06 -0600 Subject: [PATCH 06/12] Only check for nans once Signed-off-by: Patrick Avery --- hexrd/matrixutil.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hexrd/matrixutil.py b/hexrd/matrixutil.py index bf675fb95..645a2b875 100644 --- a/hexrd/matrixutil.py +++ b/hexrd/matrixutil.py @@ -686,10 +686,10 @@ def findDuplicateVectors_old(vec, tol=vTol, equivPM=False): def findDuplicateVectors(vec, tol=vTol, equivPM=False): eqv = _findduplicatevectors(vec, tol, equivPM) uid = np.arange(0, vec.shape[1], dtype=np.int64) - idx = eqv[~np.isnan(eqv)].astype(np.int64) + mask = ~np.isnan(eqv) + idx = eqv[mask].astype(np.int64) uid2 = list(np.delete(uid, idx)) eqv2 = [] - mask = ~np.isnan(eqv) for ii in range(eqv.shape[1]): v = eqv[ii, mask[ii, :]] if v.shape[0] > 0: From 71013802fc76fc1b656e931ade56dc28c257373c Mon Sep 17 00:00:00 2001 From: Saransh Singh Date: Fri, 5 Jan 2024 11:04:39 -0800 Subject: [PATCH 07/12] fix logic errors. --- hexrd/matrixutil.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/hexrd/matrixutil.py b/hexrd/matrixutil.py index 645a2b875..aeb10d5d4 100644 --- a/hexrd/matrixutil.py +++ b/hexrd/matrixutil.py @@ -690,7 +690,7 @@ def findDuplicateVectors(vec, tol=vTol, equivPM=False): idx = eqv[mask].astype(np.int64) uid2 = list(np.delete(uid, idx)) eqv2 = [] - for ii in range(eqv.shape[1]): + for ii in range(eqv.shape[0]): v = eqv[ii, mask[ii, :]] if v.shape[0] > 0: eqv2.append([ii] + list(v.astype(np.int64))) @@ -737,7 +737,7 @@ def _findduplicatevectors(vec, tol, equivPM): """ if equivPM: - vec2 = np.abs(vec.copy()) + vec2 = -vec.copy() n = vec.shape[0] m = vec.shape[1] @@ -753,13 +753,16 @@ def _findduplicatevectors(vec, tol, equivPM): if jj > ii: if equivPM: - diff = np.sum(np.abs(vec[:, ii]-vec2[:, jj])) + diff = np.sum(np.abs(vec[:, ii]-vec2[:, jj])) + diff2 = np.sum(np.abs(vec[:, ii]-vec[:, jj])) + if diff < tol or diff2 < tol: + eqv_elem[ctr] = jj + ctr += 1 else: diff = np.sum(np.abs(vec[:, ii]-vec[:, jj])) - - if diff < tol: - eqv_elem[ctr] = jj - ctr += 1 + if diff < tol: + eqv_elem[ctr] = jj + ctr += 1 for kk in range(ctr): eqv[ii, kk] = eqv_elem[kk] From 3f59e9e49b22beaec13bc71c1a2f969df11c598e Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Fri, 5 Jan 2024 16:37:25 -0600 Subject: [PATCH 08/12] Reduce the number of numba threads Too many numba threads are causing allocator contention. Therefore we can limit the number of numba threads to 8. Signed-off-by: Patrick Avery --- hexrd/matrixutil.py | 31 ++++++++++++++++--------------- hexrd/utils/decorators.py | 18 ++++++++++++++++++ 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/hexrd/matrixutil.py b/hexrd/matrixutil.py index aeb10d5d4..73a81a3a3 100644 --- a/hexrd/matrixutil.py +++ b/hexrd/matrixutil.py @@ -32,7 +32,7 @@ from scipy import sparse -from hexrd.utils.decorators import numba_njit_if_available +from hexrd.utils.decorators import limit_numba_threads, numba_njit_if_available from hexrd import constants from hexrd.constants import USE_NUMBA if USE_NUMBA: @@ -697,6 +697,9 @@ def findDuplicateVectors(vec, tol=vTol, equivPM=False): return eqv2, uid2 +# We found that too many threads causes allocator contention, +# so limit the number of threads here to just 8. +@limit_numba_threads(8) @numba_njit_if_available(cache=True, nogil=True, parallel=True) def _findduplicatevectors(vec, tol, equivPM): """ @@ -749,20 +752,18 @@ def _findduplicatevectors(vec, tol, equivPM): ctr = 0 eqv_elem = np.zeros((m, ), dtype=np.int64) - for jj in prange(m): - if jj > ii: - - if equivPM: - diff = np.sum(np.abs(vec[:, ii]-vec2[:, jj])) - diff2 = np.sum(np.abs(vec[:, ii]-vec[:, jj])) - if diff < tol or diff2 < tol: - eqv_elem[ctr] = jj - ctr += 1 - else: - diff = np.sum(np.abs(vec[:, ii]-vec[:, jj])) - if diff < tol: - eqv_elem[ctr] = jj - ctr += 1 + for jj in prange(ii, m): + if equivPM: + diff = np.sum(np.abs(vec[:, ii]-vec2[:, jj])) + diff2 = np.sum(np.abs(vec[:, ii]-vec[:, jj])) + if diff < tol or diff2 < tol: + eqv_elem[ctr] = jj + ctr += 1 + else: + diff = np.sum(np.abs(vec[:, ii]-vec[:, jj])) + if diff < tol: + eqv_elem[ctr] = jj + ctr += 1 for kk in range(ctr): eqv[ii, kk] = eqv_elem[kk] diff --git a/hexrd/utils/decorators.py b/hexrd/utils/decorators.py index 134b4497f..f6716d4fd 100644 --- a/hexrd/utils/decorators.py +++ b/hexrd/utils/decorators.py @@ -9,6 +9,7 @@ from collections import OrderedDict from functools import wraps +import numba import numpy as np import xxhash @@ -139,3 +140,20 @@ def decorator(func): from numba import prange else: prange = range + + +# A decorator to limit the number of numba threads +def limit_numba_threads(max_threads): + def decorator(func): + def wrapper(*args, **kwargs): + prev_num_threads = numba.get_num_threads() + new_num_threads = min(prev_num_threads, max_threads) + numba.set_num_threads(new_num_threads) + try: + return func(*args, **kwargs) + finally: + numba.set_num_threads(prev_num_threads) + + return wrapper + + return decorator From b15c978ae5f65975e05f012851b07a17c19d8f71 Mon Sep 17 00:00:00 2001 From: Saransh Singh Date: Sun, 7 Jan 2024 13:29:31 -0800 Subject: [PATCH 09/12] fix bug --- hexrd/matrixutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hexrd/matrixutil.py b/hexrd/matrixutil.py index 73a81a3a3..bee7cb8fc 100644 --- a/hexrd/matrixutil.py +++ b/hexrd/matrixutil.py @@ -752,7 +752,7 @@ def _findduplicatevectors(vec, tol, equivPM): ctr = 0 eqv_elem = np.zeros((m, ), dtype=np.int64) - for jj in prange(ii, m): + for jj in prange(ii+1, m): if equivPM: diff = np.sum(np.abs(vec[:, ii]-vec2[:, jj])) diff2 = np.sum(np.abs(vec[:, ii]-vec[:, jj])) From a15f20c70c69da26e358c2181a27a8831fc9ad86 Mon Sep 17 00:00:00 2001 From: Saransh Singh Date: Mon, 8 Jan 2024 16:44:12 -0800 Subject: [PATCH 10/12] output matches the old code, but no real speedup anymore. --- hexrd/matrixutil.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/hexrd/matrixutil.py b/hexrd/matrixutil.py index bee7cb8fc..a9da21fdf 100644 --- a/hexrd/matrixutil.py +++ b/hexrd/matrixutil.py @@ -700,7 +700,7 @@ def findDuplicateVectors(vec, tol=vTol, equivPM=False): # We found that too many threads causes allocator contention, # so limit the number of threads here to just 8. @limit_numba_threads(8) -@numba_njit_if_available(cache=True, nogil=True, parallel=True) +@numba_njit_if_available(cache=True, nogil=True) def _findduplicatevectors(vec, tol, equivPM): """ Find vectors in an array that are equivalent to within @@ -747,23 +747,26 @@ def _findduplicatevectors(vec, tol, equivPM): eqv = np.zeros((m, m), dtype=np.float64) eqv[:] = np.nan + eqv_elem_master = [] - for ii in prange(m): + for ii in range(m): ctr = 0 eqv_elem = np.zeros((m, ), dtype=np.int64) - - for jj in prange(ii+1, m): - if equivPM: - diff = np.sum(np.abs(vec[:, ii]-vec2[:, jj])) - diff2 = np.sum(np.abs(vec[:, ii]-vec[:, jj])) - if diff < tol or diff2 < tol: - eqv_elem[ctr] = jj - ctr += 1 - else: - diff = np.sum(np.abs(vec[:, ii]-vec[:, jj])) - if diff < tol: - eqv_elem[ctr] = jj - ctr += 1 + for jj in range(ii+1, m): + if not jj in eqv_elem_master: + if equivPM: + diff = np.sum(np.abs(vec[:, ii]-vec2[:, jj])) + diff2 = np.sum(np.abs(vec[:, ii]-vec[:, jj])) + if diff < tol or diff2 < tol: + eqv_elem[ctr] = jj + eqv_elem_master.append(jj) + ctr += 1 + else: + diff = np.sum(np.abs(vec[:, ii]-vec[:, jj])) + if diff < tol: + eqv_elem[ctr] = jj + eqv_elem_master.append(jj) + ctr += 1 for kk in range(ctr): eqv[ii, kk] = eqv_elem[kk] From b764faed38cf4c2e48e6c5f3e65be0481b56af74 Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Mon, 8 Jan 2024 19:02:47 -0600 Subject: [PATCH 11/12] Remove limit_numba_threads() It's not in parallel anymore, so we don't need it. Signed-off-by: Patrick Avery --- hexrd/matrixutil.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/hexrd/matrixutil.py b/hexrd/matrixutil.py index a9da21fdf..12569a21b 100644 --- a/hexrd/matrixutil.py +++ b/hexrd/matrixutil.py @@ -32,7 +32,7 @@ from scipy import sparse -from hexrd.utils.decorators import limit_numba_threads, numba_njit_if_available +from hexrd.utils.decorators import numba_njit_if_available from hexrd import constants from hexrd.constants import USE_NUMBA if USE_NUMBA: @@ -697,9 +697,6 @@ def findDuplicateVectors(vec, tol=vTol, equivPM=False): return eqv2, uid2 -# We found that too many threads causes allocator contention, -# so limit the number of threads here to just 8. -@limit_numba_threads(8) @numba_njit_if_available(cache=True, nogil=True) def _findduplicatevectors(vec, tol, equivPM): """ From bea9b1c82d3d3e880284a1ac03c6825489cae213 Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Mon, 8 Jan 2024 19:34:44 -0600 Subject: [PATCH 12/12] Lazily compute structure factor Instead of computing the structure factor every time a property of the material changes, flag the structure factor as invalid, and only re-compute it if it is requested. This significantly speeds up interaction with the lattice parameter, such as with the PT sliders. Signed-off-by: Patrick Avery --- hexrd/material/crystallography.py | 24 ++++++++++++++++++++++++ hexrd/material/material.py | 14 ++++++-------- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/hexrd/material/crystallography.py b/hexrd/material/crystallography.py index ee186b364..d3d5c63c4 100644 --- a/hexrd/material/crystallography.py +++ b/hexrd/material/crystallography.py @@ -700,6 +700,9 @@ def __init__(self, raise RuntimeError('have unparsed keyword arguments with keys: ' + str(list(kwargs.keys()))) + # This is only used to calculate the structure factor if invalidated + self.__unitcell = None + self.__calc() return @@ -935,7 +938,27 @@ def set_wavelength(self, wavelength): wavelength = property(get_wavelength, set_wavelength, None) + def invalidate_structure_factor(self, unitcell): + # It can be expensive to compute the structure factor, so provide the + # option to just invalidate it, while providing a unit cell, so that + # it can be lazily computed from the unit cell. + self.__structFact = None + self._powder_intensity = None + self.__unitcell = unitcell + + def _compute_sf_if_needed(self): + any_invalid = ( + self.__structFact is None or + self._powder_intensity is None + ) + if any_invalid and self.__unitcell is not None: + # Compute the structure factor first. + # This can be expensive to do, so we lazily compute it when needed. + hkls = self.getHKLs(allHKLs=True) + self.set_structFact(self.__unitcell.CalcXRSF(hkls)) + def get_structFact(self): + self._compute_sf_if_needed() return self.__structFact[~self.exclusions] def set_structFact(self, structFact): @@ -953,6 +976,7 @@ def set_structFact(self, structFact): @property def powder_intensity(self): + self._compute_sf_if_needed() return self._powder_intensity[~self.exclusions] @staticmethod diff --git a/hexrd/material/material.py b/hexrd/material/material.py index abb139f08..ffd775248 100644 --- a/hexrd/material/material.py +++ b/hexrd/material/material.py @@ -226,7 +226,7 @@ def __init__( self.reset_v0() self._newPdata() - self.update_structure_factor() + self.invalidate_structure_factor() def __str__(self): """String representation""" @@ -291,7 +291,7 @@ def _newUnitcell(self): def _hkls_changed(self): # Call this when something happens that changes the hkls... self._newPdata() - self.update_structure_factor() + self.invalidate_structure_factor() def _newPdata(self): """Create a new plane data instance if the hkls have changed""" @@ -405,10 +405,8 @@ def enable_hkls_below_tth(self, tth_threshold=90.0): self._pData.exclusions = dflt_excl - def update_structure_factor(self): - hkls = self.planeData.getHKLs(allHKLs=True) - sf = self.unitcell.CalcXRSF(hkls) - self.planeData.set_structFact(sf) + def invalidate_structure_factor(self): + self.planeData.invalidate_structure_factor(self.unitcell) def compute_powder_overlay( self, ttharray=np.linspace(0, 80, 2000), fwhm=0.25, scale=1.0 @@ -1268,7 +1266,7 @@ def charge(self, vals): self._charge = vals # self._newUnitcell() - # self.update_structure_factor() + # self.invalidate_structure_factor() @property def absorption_length(self): @@ -1390,7 +1388,7 @@ def _set_atomdata(self, atomtype, atominfo, U, charge): self.charge = charge self._newUnitcell() - self.update_structure_factor() + self.invalidate_structure_factor() # # ========== Methods