Skip to content

Commit

Permalink
Merge pull request #564 from Rosalbam1/dev
Browse files Browse the repository at this point in the history
dgeom and cleanup edits- adding doc strings and types
  • Loading branch information
avcopan authored Aug 30, 2024
2 parents 434b9fb + 3e50572 commit 36f884a
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 65 deletions.
147 changes: 93 additions & 54 deletions automol/embed/_cleanup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""implements geometry purification for the distance geometry algorithm.
"""Implements geometry purification for the distance geometry algorithm.
This is used to clean up the structure of the molecule and enforce correct
chirality, by minimizing an error function.
Expand Down Expand Up @@ -50,7 +50,7 @@
X = numpy.newaxis


def volume(xmat: NDArrayLike2D, idxs: list):
def volume(xmat: NDArrayLike2D, idxs: list) -> float:
"""Calculate signed tetrahedral volume for a tetrad of atoms.
for a tetrad of four atoms (1, 2, 3, 4) around a central atom, the signed
Expand All @@ -59,6 +59,10 @@ def volume(xmat: NDArrayLike2D, idxs: list):
d12 . (d13 x d14)
where dij = rj - ri, . is the dot product, and x is the cross product
:param xmat: The matrix of XYZ coordinates
:param idxs: A tetrad of four atom indices defining the volume
:return: The volume
"""
xmat = numpy.array(xmat)
idxs = list(idxs)
Expand All @@ -71,8 +75,12 @@ def volume(xmat: NDArrayLike2D, idxs: list):
return vol


def volume_gradient(xmat: NDArrayLike2D, idxs: list):
"""Calculate the tetrahedral volume gradient for a tetrad of atoms."""
def volume_gradient(xmat: NDArrayLike2D, idxs: list) -> numpy.ndarray:
"""Calculate the tetrahedral volume gradient for a tetrad of atoms.
:param xmat: The matrix of XYZ coordinates
:param idxs: A tetrad of four atom indices defining the volume
:return: The volume gradient.
"""
xmat = numpy.array(xmat)
idxs = list(idxs)
xyzs = xmat[:, :3][idxs]
Expand All @@ -94,28 +102,28 @@ def error_function_(
umat: NDArrayLike2D,
chi_dct: SignedVolumeContraints = None,
pla_dct: SignedVolumeContraints = None,
wdist=1.0,
wchip=1.0,
wdim4=1.0,
leps=0.1,
ueps=0.1,
log=False,
wdist: float = 1.0,
wchip: float = 1.0,
wdim4: float = 1.0,
leps: float = 0.1,
ueps: float = 0.1,
log: bool = False,
) -> Callable[[NDArrayLike2D], float]:
"""Compute the embedding error function.
:param lmat: lower-bound distance matrix
:param umat: upper-bound distance matrix
:param chi_dct: chirality constraints; the keys are tuples of four atoms,
:param lmat: Lower-bound distance matrix
:param umat: Upper-bound distance matrix
:param chi_dct: Chirality constraints; the keys are tuples of four atoms,
the values are lower and upper bounds on the four-point signed volume
of these atoms
:param pla_dct: planarity constraints; the keys are tuples of four atoms,
:param pla_dct: Planarity constraints; the keys are tuples of four atoms,
the values are lower and upper bounds on the four-point signed volume
of these atoms
:param wdist: weight on the distance constraint
:param wchip: weight on the chirality/planarity constraint
:param wdim4: weight on the fourth dimension constraint
:param leps: denominator epsilon for lower bound distances
:param ueps: denominator epsilon for upper bound distances
:param wdist: Weight on the distance constraint
:param wchip: Weight on the chirality/planarity constraint
:param wdim4: Weight on the fourth dimension constraint
:param leps: Denominator epsilon for lower bound distances
:param ueps: Denominator epsilon for upper bound distances
"""
triu = numpy.triu_indices_from(lmat)
chi_dct = {} if chi_dct is None else chi_dct
Expand Down Expand Up @@ -183,27 +191,28 @@ def error_function_gradient_(
umat: NDArrayLike2D,
chi_dct: SignedVolumeContraints = None,
pla_dct: SignedVolumeContraints = None,
wdist=1.0,
wchip=1.0,
wdim4=1.0,
leps=0.1,
ueps=0.1,
wdist: float = 1.0,
wchip: float = 1.0,
wdim4: float = 1.0,
leps: float = 0.1,
ueps: float = 0.1,
) -> Callable[[NDArrayLike2D], numpy.ndarray]:
"""Check the embedding error function gradient.
:param lmat: lower-bound distance matrix
:param umat: upper-bound distance matrix
:param chi_dct: chirality constraints; the keys are tuples of four atoms,
:param lmat: Lower-bound distance matrix
:param umat: Upper-bound distance matrix
:param chi_dct: Chirality constraints; the keys are tuples of four atoms,
the values are lower and upper bounds on the four-point signed volume
of these atoms
:param pla_dct: planarity constraints; the keys are tuples of four atoms,
:param pla_dct: Planarity constraints; the keys are tuples of four atoms,
the values are lower and upper bounds on the four-point signed volume
of these atoms
:param wdist: weight on the distance constraint
:param wchip: weight on the chirality/planarity constraint
:param wdim4: weight on the fourth dimension constraint
:param leps: denominator epsilon for lower bound distances
:param ueps: denominator epsilon for upper bound distances
:param wdist: Weight on the distance constraint
:param wchip: Weight on the chirality/planarity constraint
:param wdim4: Weight on the fourth dimension constraint
:param leps: Denominator epsilon for lower bound distances
:param ueps: Denominator epsilon for upper bound distances
:return: Error function gradient
"""
chi_dct = {} if chi_dct is None else chi_dct
pla_dct = {} if pla_dct is None else pla_dct
Expand Down Expand Up @@ -261,15 +270,29 @@ def error_function_numerical_gradient_(
umat: NDArrayLike2D,
chi_dct: SignedVolumeContraints = None,
pla_dct: SignedVolumeContraints = None,
wdist=1.0,
wchip=1.0,
wdim4=1.0,
leps=0.1,
ueps=0.1,
wdist: float = 1.0,
wchip: float = 1.0,
wdim4: float = 1.0,
leps: float = 0.1,
ueps: float = 0.1,
) -> Callable[[NDArrayLike2D], numpy.ndarray]:
"""Check the gradient of the distance error function.
(For testing purposes only; Used to check the analytic gradient formula.)
:param lmat: Lower-bound distance matrix
:param umat: Upper-bound distance matrix
:param chi_dct: Chirality constraints; the keys are tuples of four atoms,
the values are lower and upper bounds on the four-point signed volume
of these atoms
:param pla_dct: Planarity constraints; the keys are tuples of four atoms,
the values are lower and upper bounds on the four-point signed volume
of these atoms
:param wdist: Weight on the distance constraint
:param wchip: Weight on the chirality/planarity constraint
:param wdim4: Weight on the fourth dimension constraint
:param leps: Denominator epsilon for lower bound distances
:param ueps: Denominator epsilon for upper bound distances
:return:Gradient of the distance error function
"""
erf_ = error_function_(
lmat,
Expand All @@ -290,12 +313,21 @@ def _gradient(xmat):
return _gradient


def polak_ribiere_beta(sd1, sd0):
"""Calculate the Polak-Ribiere Beta coefficient."""
def polak_ribiere_beta(sd1: numpy.ndarray, sd0: numpy.ndarray) -> float:
"""Determine the conjugate gradient alpha coefficient
from an error-minimizing line search.
(See https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method)
:param sd1: The steepest-descent (negative gradient) direction of the current step
:return: The alpha coefficient
"""
return numpy.vdot(sd1, sd1 - sd0) / numpy.vdot(sd0, sd0)


def line_search_alpha(err_, sd1, cd1):
def line_search_alpha(
err_: Callable[[NDArrayLike2D], float], sd1: numpy.ndarray, cd1: numpy.ndarray
):
"""Perform a line search to determine the alpha coefficient."""

# define the objective function
Expand All @@ -322,12 +354,12 @@ def cleaned_up_coordinates(
chi_dct: SignedVolumeContraints = None,
pla_dct: SignedVolumeContraints = None,
conv_=None,
max_dist_err=0.2,
grad_thresh=0.2,
maxiter=None,
chi_flip=True,
dim4=True,
log=False,
max_dist_err: float = 0.2,
grad_thresh: float = 0.2,
maxiter: int | None = None,
chi_flip: bool = True,
dim4: bool = True,
log: bool = False,
):
"""Clean up coordinates by conjugate-gradients error minimization.
Expand Down Expand Up @@ -402,8 +434,8 @@ def _is_converged(xmat, err, grad):


def distance_convergence_checker_(
lmat: NDArrayLike2D, umat: NDArrayLike2D, max_dist_err=0.2
):
lmat: NDArrayLike2D, umat: NDArrayLike2D, max_dist_err: float = 0.2
) -> numpy.ndarray:
"""Convergence checker based on the maximum distance error."""

def _is_converged(xmat, err, grad):
Expand All @@ -418,7 +450,7 @@ def _is_converged(xmat, err, grad):


def planarity_convergence_checker_(
pla_dct, max_vol_err=0.2
pla_dct: dict, max_vol_err: float = 0.2
) -> Callable[[NDArrayLike2D, float, NDArrayLike2D], bool]:
"""Convergence checker based on the maximum planarity error."""

Expand All @@ -435,7 +467,7 @@ def _is_converged(xmat, err, grad):


def gradient_convergence_checker_(
thresh=1e-1,
thresh: float = 1e-1,
) -> Callable[[NDArrayLike2D, float, NDArrayLike2D], bool]:
"""Maximum gradient convergence checker."""

Expand All @@ -450,14 +482,21 @@ def _is_converged(xmat, err, grad):
return _is_converged


def minimize_error(xmat: NDArrayLike2D, err_, grad_, conv_, maxiter=None):
def minimize_error(
xmat: NDArrayLike2D,
err_: Callable[[NDArrayLike2D], float],
grad_,
conv_,
maxiter: int | None = None,
) -> bool:
"""Do conjugate-gradients error minimization.
:param err_: a callable error function of xmat
:param grad_: a callable error gradient function of xmat
:param conv_: a callable convergence checker function of xmat, err_(xmat),
and grad_(xmat) which returns True if the geometry is converged
:param maxiter: maximum number of iterations; default is three times the
number of coordinates
:returns: the optimized coordinates and a boolean which is True if
converged and False if not
"""
Expand All @@ -479,7 +518,7 @@ def minimize_error(xmat: NDArrayLike2D, err_, grad_, conv_, maxiter=None):
if sd0 is None:
cd1 = sd1
else:
# 2. Cumpute beta
# 2. Compute beta
beta = min(0.0, polak_ribiere_beta(sd1, sd0))

# 3. determine step direction
Expand Down
41 changes: 30 additions & 11 deletions automol/embed/_dgeom.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,14 @@
MatrixLike = numpy.ndarray | Sequence2D


def sample_raw_distance_coordinates(lmat: MatrixLike, umat: MatrixLike, dim4=True):
"""Sample raw (uncorrected) distance coordinates."""
def sample_raw_distance_coordinates(
lmat: MatrixLike, umat: MatrixLike, dim4=True
) -> MatrixLike:
"""Sample raw (uncorrected) distance coordinates.
:param lmat: Lower-bound distance matrix
:param umat: Upper-bound distance matrix.
:return: Matrix with raw distance coordinates.
"""
# 2. Triangle-smooth the bounds matrices
lmat, umat = triangle_smooth_bounds_matrices(lmat, umat)

Expand All @@ -59,13 +65,16 @@ def sample_raw_distance_coordinates(lmat: MatrixLike, umat: MatrixLike, dim4=Tru
return xmat


def triangle_smooth_bounds_matrices(lmat: MatrixLike, umat: MatrixLike):
def triangle_smooth_bounds_matrices(lmat: MatrixLike, umat: MatrixLike) -> MatrixLike:
"""Smoothing of the bounds matrix by triangle inequality.
Dress, A. W. M.; Havel, T. F. "Shortest-Path Problems and Molecular
Conformation"; Discrete Applied Mathematics (1988) 19 p. 129-144.
This algorithm is directly from p. 8 in the paper.
:param lmat: Lower-bound distance matrix
:param umat: Upper-bound distance matrix
:return: Smooth matrix
"""
lmat, umat = map(numpy.array, (lmat, umat))

Expand Down Expand Up @@ -93,10 +102,13 @@ def triangle_smooth_bounds_matrices(lmat: MatrixLike, umat: MatrixLike):
return lmat, umat


def sample_distance_matrix(lmat: MatrixLike, umat: MatrixLike):
def sample_distance_matrix(lmat: MatrixLike, umat: MatrixLike) -> MatrixLike:
"""Determine a random distance matrix based on the bounds matrices.
That is, a random guess at d_ij = |r_i - r_j|
:param lmat: Lower-bound distance matrix
:param umat: Upper-bound distance matrix
:return: Sample matrix
"""
lmat, umat = map(numpy.array, (lmat, umat))

Expand All @@ -108,7 +120,7 @@ def sample_distance_matrix(lmat: MatrixLike, umat: MatrixLike):
return dmat


def distances_from_center(dmat: MatrixLike):
def distances_from_center(dmat: MatrixLike) -> MatrixLike:
"""Get the vector of distances from the center (average position).
The "center" in this case is the average of the position vectors. The
Expand All @@ -122,6 +134,8 @@ def distances_from_center(dmat: MatrixLike):
I verified this against the alternative formula:
dc_i^2 = 1/(2n^2) sum_j sum_k (d_ij^2 + d_ik^2 - d_jk^2)
from page 284 of the paper.
:dmat: Distance Matrix
:return: Distances from center
"""
dmat = numpy.array(dmat)

Expand All @@ -142,7 +156,7 @@ def distances_from_center(dmat: MatrixLike):
return dcvec


def metric_matrix(dmat: MatrixLike):
def metric_matrix(dmat: MatrixLike) -> MatrixLike:
"""Compute the matrix of position vector dot products, with a central origin.
"Central" in this case mean the average of the position vectors. So these
Expand All @@ -168,7 +182,7 @@ def metric_matrix(dmat: MatrixLike):
return gmat


def coordinates_from_metric_matrix(gmat: MatrixLike, dim4=False):
def coordinates_from_metric_matrix(gmat: MatrixLike, dim4: bool = False) -> MatrixLike:
"""Determine molecule coordinates from the metric matrix."""
gmat = numpy.array(gmat)

Expand All @@ -186,7 +200,7 @@ def coordinates_from_metric_matrix(gmat: MatrixLike, dim4=False):
return xmat


def metric_matrix_from_coordinates(xmat: MatrixLike):
def metric_matrix_from_coordinates(xmat: MatrixLike) -> MatrixLike:
"""Determine the metric matrix from coordinates.
(for testing purposes only!)
Expand All @@ -195,7 +209,7 @@ def metric_matrix_from_coordinates(xmat: MatrixLike):
return xmat @ xmat.T


def distance_matrix_from_coordinates(xmat: MatrixLike, dim4=True):
def distance_matrix_from_coordinates(xmat: MatrixLike, dim4: bool = True) -> MatrixLike:
"""Determine the distance matrix from coordinates.
(for testing purposes only!)
Expand All @@ -215,9 +229,14 @@ def distance_matrix_from_coordinates(xmat: MatrixLike, dim4=True):


def greatest_distance_errors(
dmat: MatrixLike, lmat: MatrixLike, umat: MatrixLike, count=10
dmat: MatrixLike, lmat: MatrixLike, umat: MatrixLike, count: int = 10
):
"""Get the indices of the maximum distance errors."""
"""Get the indices of the maximum distance errors.
:dmat: Distance matrix
:param lmat: Lower-bound distance matrix
:param umat: Upper-bound distance matrix
return: Dictionary of greatest distance errores.
"""
lerrs = (lmat - dmat) * (lmat >= dmat)
uerrs = (dmat - umat) * (dmat >= umat)
errs = numpy.maximum(lerrs, uerrs)
Expand Down

0 comments on commit 36f884a

Please sign in to comment.