Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tanh Khan boundaries #7

Merged
merged 5 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyoptgra/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# file, you can obtain them at https://www.gnu.org/licenses/gpl-3.0.txt
# and https://essr.esa.int/license/european-space-agency-community-license-v2-4-weak-copyleft

from .optgra import khan_function, optgra # noqa
from .optgra import khan_function_sin, khan_function_tanh, optgra # noqa

try:
from ._version import version as __version__ # noqa
Expand Down
249 changes: 186 additions & 63 deletions pyoptgra/optgra.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,23 @@
)


class khan_function:
r"""Function to smothly enforce optimisation parameter bounds as Michal Khan used to do:
class base_khan_function:
r"""Base class for a function to smothly enforce optimisation parameter bounds as Michal Khan
used to do:

.. math::

x = \frac{x_{max} + x_{min}}{2} + \frac{x_{max} - x_{min}}{2} \cdot \sin(x_{khan})
x = \frac{x_{max} + x_{min}}{2} + \frac{x_{max} - x_{min}}{2} \cdot \f(x_{khan})

Where :math:`x` is the pagmo decision vector and :math:`x_{khan}` is the decision vector
passed to OPTGRA. In this way parameter bounds are guaranteed to be satisfied, but the gradients
near the bounds approach zero.

The child class needs to implement the methods `_eval`, `_eval_inv`, `_eval_grad` and
`_eval_inv_grad`
""" # noqa: W605

def __init__(self, lb: List[float], ub: List[float], unity_gradient: bool = True):
def __init__(self, lb: List[float], ub: List[float]):
"""Constructor

Parameters
Expand All @@ -48,11 +52,6 @@ def __init__(self, lb: List[float], ub: List[float], unity_gradient: bool = True
Lower pagmo parameter bounds
ub : List[float]
Upper pagmo parameter bounds
unity_gradient : bool, optional
Uses an internal scaling that ensures that the derivative of pagmo parameters w.r.t.
khan parameters are unity at (lb + ub)/2. By default True.
Otherwise, the original Khan method is used that can result in strongly modified
gradients
"""
self._lb = np.asarray(lb)
self._ub = np.asarray(ub)
Expand Down Expand Up @@ -86,54 +85,6 @@ def _isfinite(a: np.ndarray):
self._lb_masked = self._lb[self.mask]
self._ub_masked = self._ub[self.mask]

# determine coefficients inside the sin function
self._a = 2 / (self._ub_masked - self._lb_masked) if unity_gradient else 1.0
self._b = (
-(self._ub_masked + self._lb_masked) / (self._ub_masked - self._lb_masked)
if unity_gradient
else 0.0
)

def _eval(self, x_khan_masked: np.ndarray) -> np.ndarray:
return (self._ub_masked + self._lb_masked) / 2 + (
self._ub_masked - self._lb_masked
) / 2 * np.sin(x_khan_masked * self._a + self._b)

def _eval_inv(self, x_masked: np.ndarray) -> np.ndarray:
arg = (2 * x_masked - self._ub_masked - self._lb_masked) / (
self._ub_masked - self._lb_masked
)

clip_value = 1.0 - 1e-8 # avoid boundaries
if np.any((arg < -clip_value) | (arg > clip_value)):
print(
"WARNING: Numerical inaccuracies encountered during khan_function inverse.",
"Clipping parameters to valid range.",
)
arg = np.clip(arg, -clip_value, clip_value)
return (np.arcsin(arg) - self._b) / self._a

def _eval_grad(self, x_khan_masked: np.ndarray) -> np.ndarray:
return (
(self._ub_masked - self._lb_masked)
/ 2
* np.cos(self._a * x_khan_masked + self._b)
* self._a
)

def _eval_inv_grad(self, x_masked: np.ndarray) -> np.ndarray:
return (
-1
/ self._a
/ (
(self._lb_masked - self._ub_masked)
* np.sqrt(
((self._lb_masked - x_masked) * (x_masked - self._ub_masked))
/ (self._ub_masked - self._lb_masked) ** 2
)
)
)

def _apply_to_subset(
self, x: np.ndarray, func: Callable, default_result: Optional[np.ndarray] = None
) -> np.ndarray:
Expand All @@ -144,6 +95,18 @@ def _apply_to_subset(
result[self.mask] = func(x[self.mask])
return result

def _eval(self, x_khan_masked: np.ndarray) -> np.ndarray:
raise NotImplementedError

def _eval_inv(self, x_masked: np.ndarray) -> np.ndarray:
raise NotImplementedError

def _eval_grad(self, x_khan_masked: np.ndarray) -> np.ndarray:
raise NotImplementedError

def _eval_inv_grad(self, x_masked: np.ndarray) -> np.ndarray:
raise NotImplementedError

def eval(self, x_khan: np.ndarray) -> np.ndarray:
"""Convert :math:`x_{optgra}` to :math:`x`.

Expand Down Expand Up @@ -208,6 +171,153 @@ def eval_inv_grad(self, x: np.ndarray) -> np.ndarray:
return np.diag(self._apply_to_subset(np.asarray(x), self._eval_inv_grad, np.ones(self._nx)))


class khan_function_sin(base_khan_function):
r"""Function to smothly enforce optimisation parameter bounds as Michal Khan used to do:

.. math::

x = \frac{x_{max} + x_{min}}{2} + \frac{x_{max} - x_{min}}{2} \cdot \sin(x_{khan})

Where :math:`x` is the pagmo decision vector and :math:`x_{khan}` is the decision vector
passed to OPTGRA. In this way parameter bounds are guaranteed to be satisfied, but the gradients
near the bounds approach zero.
""" # noqa: W605

def __init__(self, lb: List[float], ub: List[float], unity_gradient: bool = True):
"""Constructor

Parameters
----------
lb : List[float]
Lower pagmo parameter bounds
ub : List[float]
Upper pagmo parameter bounds
unity_gradient : bool, optional
Uses an internal scaling that ensures that the derivative of pagmo parameters w.r.t.
Khan parameters are unity at (lb + ub)/2. By default True.
Otherwise, the original Khan method is used that can result in strongly modified
gradients
"""
# call parent class constructor
super().__init__(lb, ub)

# determine coefficients inside the sin function
self._a = 2 / (self._ub_masked - self._lb_masked) if unity_gradient else 1.0
self._b = (
-(self._ub_masked + self._lb_masked) / (self._ub_masked - self._lb_masked)
if unity_gradient
else 0.0
)

def _eval(self, x_khan_masked: np.ndarray) -> np.ndarray:
return (self._ub_masked + self._lb_masked) / 2 + (
self._ub_masked - self._lb_masked
) / 2 * np.sin(x_khan_masked * self._a + self._b)

def _eval_inv(self, x_masked: np.ndarray) -> np.ndarray:
arg = (2 * x_masked - self._ub_masked - self._lb_masked) / (
self._ub_masked - self._lb_masked
)

clip_value = 1.0 - 1e-8 # avoid boundaries
if np.any((arg < -clip_value) | (arg > clip_value)):
print(
"WARNING: Numerical inaccuracies encountered during khan_function inverse.",
"Clipping parameters to valid range.",
)
arg = np.clip(arg, -clip_value, clip_value)
return (np.arcsin(arg) - self._b) / self._a

def _eval_grad(self, x_khan_masked: np.ndarray) -> np.ndarray:
return (
(self._ub_masked - self._lb_masked)
/ 2
* np.cos(self._a * x_khan_masked + self._b)
* self._a
)

def _eval_inv_grad(self, x_masked: np.ndarray) -> np.ndarray:
return (
-1
/ self._a
/ (
(self._lb_masked - self._ub_masked)
* np.sqrt(
((self._lb_masked - x_masked) * (x_masked - self._ub_masked))
/ (self._ub_masked - self._lb_masked) ** 2
)
)
)


class khan_function_tanh(base_khan_function):
r"""Function to smothly enforce optimisation parameter bounds using the hyperbolic tangent:

.. math::

x = \frac{x_{max} + x_{min}}{2} + \frac{x_{max} - x_{min}}{2} \cdot \tanh(x_{khan})

Where :math:`x` is the pagmo decision vector and :math:`x_{khan}` is the decision vector
passed to OPTGRA. In this way parameter bounds are guaranteed to be satisfied, but the gradients
near the bounds approach zero.
""" # noqa: W605

def __init__(self, lb: List[float], ub: List[float], unity_gradient: bool = True):
"""Constructor

Parameters
----------
lb : List[float]
Lower pagmo parameter bounds
ub : List[float]
Upper pagmo parameter bounds
unity_gradient : bool, optional
Uses an internal scaling that ensures that the derivative of pagmo parameters w.r.t.
khan parameters are unity at (lb + ub)/2. By default True.
Otherwise, the original Khan method is used that can result in strongly modified
gradients
"""
# call parent class constructor
super().__init__(lb, ub)

# define amplification factor to avoid bounds to be only reached at +/- infinity
amp = 1.0 + 1e-3

# define the clip value (we avoid the boundaries of the parameters by this much)
self.clip_value = 1.0 - 1e-6

# determine coefficients inside the tanh function
self._diff_masked = amp * (self._ub_masked - self._lb_masked)
self._sum_masked = self._ub_masked + self._lb_masked
self._a = 2 / self._diff_masked if unity_gradient else 1.0
self._b = -self._sum_masked / self._diff_masked if unity_gradient else 0.0

def _eval(self, x_khan_masked: np.ndarray) -> np.ndarray:
return self._sum_masked / 2 + self._diff_masked / 2 * np.tanh(
x_khan_masked * self._a + self._b
)

def _eval_inv(self, x_masked: np.ndarray) -> np.ndarray:
arg = (2 * x_masked - self._sum_masked) / (self._diff_masked)

if np.any((arg < -self.clip_value) | (arg > self.clip_value)):
print(
"WARNING: Numerical inaccuracies encountered during khan_function inverse.",
"Clipping parameters to valid range.",
)
arg = np.clip(arg, -self.clip_value, self.clip_value)
return (np.arctanh(arg) - self._b) / self._a

def _eval_grad(self, x_khan_masked: np.ndarray) -> np.ndarray:
return self._diff_masked / 2 / np.cosh(self._a * x_khan_masked + self._b) ** 2 * self._a

def _eval_inv_grad(self, x_masked: np.ndarray) -> np.ndarray:

return (2 * self._diff_masked) / (
self._a * (self._diff_masked**2 - (2 * x_masked - self._sum_masked) ** 2)
)


class optgra:
"""
This class is a user defined algorithm (UDA) providing a wrapper around OPTGRA, which is written
Expand Down Expand Up @@ -247,7 +357,7 @@ def _wrap_fitness_func(
problem,
bounds_to_constraints: bool = True,
force_bounds: bool = False,
khanf: Optional[khan_function] = None,
khanf: Optional[base_khan_function] = None,
):
# get problem parameters
lb, ub = problem.get_bounds()
Expand Down Expand Up @@ -289,7 +399,7 @@ def _wrap_gradient_func(
problem,
bounds_to_constraints: bool = True,
force_bounds=False,
khanf: Optional[khan_function] = None,
khanf: Optional[base_khan_function] = None,
):

# get the sparsity pattern to index the sparse gradients
Expand Down Expand Up @@ -369,7 +479,7 @@ def __init__(
merit_function_threshold: float = 1e-6,
# bound_constraints_scalar: float = 1,
force_bounds: bool = False,
khan_bounds: bool = False,
khan_bounds: Union[str, bool] = False,
optimization_method: int = 2,
log_level: int = 0,
) -> None:
Expand Down Expand Up @@ -416,18 +526,21 @@ def __init__(
If active, the gradients evaluated near the bounds will be inacurate potentially
leading to convergence issues.
khan_bounds: optional - whether to gracefully enforce bounds on the decision vector
using Michael Khan's method:
using Michael Khan's method, by default False.:

.. math::

x = \frac{x_{max} + x_{min}}{2} + \frac{x_{max} - x_{min}}{2} \cdot \sin(x_{Khan})

Where :math:`x` is the pagmo decision vector and :math:`x_{Khan}` is the decision
vector passed to OPTGRA. In this way parameter bounds are guaranteed to be
satisfied, but the gradients near the bounds approach zero. By default False.
satisfied, but the gradients near the bounds approach zero.
Pyoptgra uses a variant of the above method that additionally scales the
argument of the :math:`\sin` function such that the derivatives
:math:`\frac{d x_{Khan}}{d x}` are unity in the center of the box bounds.
Alternatively, to a :math:`\sin` function, also a :math:`\tanh` can be
used as a Khan function.
Valid input values are: True (same as 'sin'),'sin', 'tanh' and False.
optimization_method: select 0 for steepest descent, 1 for modified spectral conjugate
gradient method, 2 for spectral conjugate gradient method and 3 for conjugate
gradient method
Expand Down Expand Up @@ -609,7 +722,17 @@ def evolve(self, population):
idx = list(population.get_ID()).index(selected[0][0])

# optional Khan function to enforce parameter bounds
khanf = khan_function(*problem.get_bounds()) if self.khan_bounds else None
if self.khan_bounds in ("sin", True):
khanf = khan_function_sin(*problem.get_bounds())
elif self.khan_bounds == "tanh":
khanf = khan_function_tanh(*problem.get_bounds())
elif self.khan_bounds:
raise ValueError(
f"Unrecognised option, {self.khan_bounds}, passed for 'khan_bounds'. "
"Supported options are 'sin', 'tanh' or None."
)
else:
khanf = None

fitness_func = optgra._wrap_fitness_func(
problem, self.bounds_to_constraints, self.force_bounds, khanf
Expand Down
21 changes: 12 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
[project]
authors = [
{name = "Johannes Schoenmaekers", email = "[email protected]"},
{name = "Moritz von Looz", email = "[email protected]"},
]
dependencies = [
"pygmo >=2.16.0",
"numpy<2.0.0",
{ name = "Johannes Schoenmaekers", email = "[email protected]" },
{ name = "Moritz von Looz", email = "[email protected]" },
]
dependencies = ["pygmo >=2.16.0", "numpy<2.0.0"]
description = "A python-wrapped version of OPTGRA, an algorithm for constrained optimization"
license = {text = "GPL-3.0 or ESCL-2.4"}
license = { text = "GPL-3.0 or ESCL-2.4" }
name = "pyoptgra"
readme = "README.rst"
requires-python = ">=3.9"
version = "1.0.1"
version = "1.1.0"

[build-system]
build-backend = "scikit_build_core.build"
requires = ["setuptools", "wheel", "scikit-build-core", "ninja", "setuptools_scm"]
requires = [
"setuptools",
"wheel",
"scikit-build-core",
"ninja",
"setuptools_scm",
]

[tool.scikit-build.wheel]
install-dir = "pyoptgra/core"
Expand Down
Loading