diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e3fc4d70..71ca6094 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,17 @@ Changelog ========= +0.7 (2017-11-30) +---------------- + +- Added the MaxVar acquisition method +- Added the RandMaxVar acquisition method +- Added the ExpIntVar acquisition method +- Implemented the Two Stage Procedure, a method of summary-statistics diagnostics +- Added new example: the stochastic Lotka-Volterra model +- Fix methods.bo.utils.minimize to be strictly within bounds +- Fix elfi.Distance to support scipy 1.0.0 + 0.6.3 (2017-09-28) ------------------ diff --git a/README.md b/README.md index cf23be66..fc6c944f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -**Version 0.6.3 released!** See the CHANGELOG and [notebooks](https://github.com/elfi-dev/notebooks). +**Version 0.7 released!** See the CHANGELOG and [notebooks](https://github.com/elfi-dev/notebooks). **NOTE:** For the time being NetworkX 2 is incompatible with ELFI. diff --git a/docs/api.rst b/docs/api.rst index 7db39631..913f4139 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -7,6 +7,8 @@ Modelling API ------------- Below is the API for creating generative models. +.. currentmodule:: . + .. autosummary:: elfi.ElfiModel @@ -28,16 +30,12 @@ Below is the API for creating generative models. **Other** -.. currentmodule:: elfi.model.elfi_model - .. autosummary:: elfi.new_model elfi.load_model elfi.get_default_model elfi.set_default_model -.. currentmodule:: elfi.visualization.visualization - .. autosummary:: elfi.draw @@ -52,6 +50,7 @@ Below is a list of inference methods included in ELFI. elfi.BayesianOptimization elfi.BOLFI + **Result objects** .. currentmodule:: elfi.methods.results @@ -62,9 +61,10 @@ Below is a list of inference methods included in ELFI. SmcSample BolfiSample + **Post-processing** -.. currentmodule:: elfi +.. currentmodule:: . .. autosummary:: elfi.adjust_posterior @@ -74,11 +74,33 @@ Below is a list of inference methods included in ELFI. .. autosummary:: LinearAdjustment + +**Diagnostics** + +.. currentmodule:: . + +.. autosummary:: + elfi.TwoStageSelection + + +**Acquisition methods** + +.. currentmodule:: elfi.methods.bo.acquisition + +.. autosummary:: + LCBSC + MaxVar + RandMaxVar + ExpIntVar + UniformAcquisition + Other ----- **Data pools** +.. currentmodule:: . + .. autosummary:: elfi.OutputPool elfi.ArrayPool @@ -86,8 +108,6 @@ Other **Module functions** -.. currentmodule:: elfi - .. autosummary:: elfi.get_client elfi.set_client @@ -102,6 +122,7 @@ Other elfi.tools.external_operation + Class documentations -------------------- @@ -146,17 +167,15 @@ Modelling API classes **Other** -.. currentmodule:: elfi.model.elfi_model +.. autofunction:: elfi.new_model -.. automethod:: elfi.new_model +.. autofunction:: elfi.load_model -.. automethod:: elfi.get_current_model +.. autofunction:: elfi.get_default_model -.. automethod:: elfi.set_current_model +.. autofunction:: elfi.set_default_model -.. currentmodule:: elfi.visualization.visualization - -.. automethod:: elfi.visualization.visualization.nx_draw +.. autofunction:: elfi.draw .. This would show undocumented members :undoc-members: @@ -180,11 +199,11 @@ Inference API classes :members: :inherited-members: -.. currentmodule:: elfi.methods.results - **Result objects** +.. currentmodule:: elfi.methods.results + .. autoclass:: OptimizationResult :members: :inherited-members: @@ -204,9 +223,9 @@ Inference API classes **Post-processing** -.. currentmodule:: elfi +.. currentmodule:: . -.. automethod:: elfi.adjust_posterior +.. autofunction:: elfi.adjust_posterior .. currentmodule:: elfi.methods.post_processing @@ -214,12 +233,45 @@ Inference API classes :members: :inherited-members: +**Diagnostics** + +.. currentmodule:: . + +.. autoclass:: elfi.TwoStageSelection + :members: + :inherited-members: + +**Acquisition methods** + +.. currentmodule:: elfi.methods.bo.acquisition + +.. autoclass:: LCBSC + :members: + :inherited-members: + +.. autoclass:: MaxVar + :members: + :inherited-members: + +.. autoclass:: RandMaxVar + :members: + :inherited-members: + +.. autoclass:: ExpIntVar + :members: + :inherited-members: + +.. autoclass:: UniformAcquisition + :members: + :inherited-members: Other ..... **Data pools** +.. currentmodule:: . + .. autoclass:: elfi.OutputPool :members: :inherited-members: @@ -231,11 +283,9 @@ Other **Module functions** -.. currentmodule:: elfi - -.. automethod:: elfi.get_client +.. autofunction:: elfi.get_client -.. automethod:: elfi.set_client +.. autofunction:: elfi.set_client **Tools** diff --git a/docs/conf.py b/docs/conf.py index c092154d..402ca86f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,9 +30,9 @@ def __getattr__(cls, name): MOCK_MODULES = [ 'pygtk', 'gtk', 'gobject', 'argparse', 'numpy', 'pandas', 'scipy', 'unqlite', 'dask', 'distributed', 'distributed.client', 'graphviz', 'matplotlib', 'sobol_seq', 'GPy', - 'dask.delayed', 'scipy.optimize', 'scipy.stats', 'scipy.spatial', 'scipy.sparse', - 'matplotlib.pyplot', 'numpy.random', 'networkx', 'ipyparallel', 'numpy.lib', - 'numpy.lib.format', 'sklearn.linear_model' + 'dask.delayed', 'scipy.linalg', 'scipy.optimize', 'scipy.stats', 'scipy.spatial', + 'scipy.sparse', 'scipy.special', 'matplotlib.pyplot', 'numpy.random', 'networkx', + 'ipyparallel', 'numpy.lib', 'numpy.lib.format', 'sklearn.linear_model' ] sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) diff --git a/docs/usage/external.rst b/docs/usage/external.rst index d6c545e2..6eabeecd 100644 --- a/docs/usage/external.rst +++ b/docs/usage/external.rst @@ -26,7 +26,8 @@ Let's begin by importing some libraries that we will be using: import scipy.stats as ss import elfi - import elfi.examples + import elfi.examples.bdm + import elfi.examples.ma2 %matplotlib inline diff --git a/docs/usage/parallelization.rst b/docs/usage/parallelization.rst index 8e7faeaf..763eb125 100644 --- a/docs/usage/parallelization.rst +++ b/docs/usage/parallelization.rst @@ -252,4 +252,3 @@ Remember to stop the ipcluster when done 2017-07-19 16:20:58.662 [IPClusterStop] Stopping cluster [pid=21020] with [signal=] - diff --git a/elfi/__init__.py b/elfi/__init__.py index c1b61669..5187b668 100644 --- a/elfi/__init__.py +++ b/elfi/__init__.py @@ -11,6 +11,7 @@ import elfi.methods.mcmc import elfi.model.tools as tools from elfi.client import get_client, set_client +from elfi.methods.diagnostics import TwoStageSelection from elfi.methods.parameter_inference import * from elfi.methods.post_processing import adjust_posterior from elfi.model.elfi_model import * @@ -23,4 +24,4 @@ __email__ = 'elfi-support@hiit.fi' # make sure __version_ is on the last non-empty line (read by setup.py) -__version__ = '0.6.3' +__version__ = '0.7' diff --git a/elfi/examples/bignk.py b/elfi/examples/bignk.py index d7feeca7..0e145c96 100644 --- a/elfi/examples/bignk.py +++ b/elfi/examples/bignk.py @@ -1,4 +1,4 @@ -"""An example implementation of the univariate g-and-k model.""" +"""Implementation of the bivariate g-and-k example model.""" from functools import partial @@ -6,160 +6,154 @@ import scipy.stats as ss import elfi -from elfi.examples.gnk import euclidean_multidim, ss_octile, ss_order, ss_robust +from elfi.examples.gnk import euclidean_multiss, ss_robust -EPS = np.finfo(float).eps - -def BiGNK(a1, a2, b1, b2, g1, g2, k1, k2, rho, c=.8, n_obs=150, batch_size=1, - random_state=None): - """Sample the bi g-and-k distribution. +def BiGNK(A1, A2, B1, B2, g1, g2, k1, k2, rho, c=.8, n_obs=150, batch_size=1, random_state=None): + """Sample the bivariate g-and-k distribution. References ---------- - [1] Drovandi, Christopher C., and Anthony N. Pettitt. "Likelihood-free - Bayesian estimation of multivariate quantile distributions." - Computational Statistics & Data Analysis 55.9 (2011): 2541-2556. - [2] Allingham, David, R. AR King, and Kerrie L. Mengersen. "Bayesian - estimation of quantile distributions."Statistics and Computing 19.2 - (2009): 189-201. + [1] Drovandi, C. C., & Pettitt, A. N. (2011). + Likelihood-free Bayesian estimation of multivariate quantile distributions. + Computational Statistics & Data Analysis, 55(9), 2541-2556. + [2] Allingham, D., King, R. A. R., & Mengersen, K. L. (2009). + Bayesian estimation of quantile distributions. + Statistics and Computing, 19(2), 189-201. + + The quantile function of g-and-k distribution is defined as follows: + + Q_{gnk} = A + B * (1 + c * (1 - exp(-g * z(p)) / 1 + exp(-g * z(p)))) + * (1 + z(p)^2)^k * z(p), where + + z(p) is the p-th standard normal quantile. + + To sample from the g-and-k distribution, draw z(p) ~ N(0, 1) and evaluate Q_{gnk}. Parameters ---------- - a1 : float or array_like - The location (the 1st dimension). - a2 : float or array_like - The location (the 2nd dimension). - b1 : float or array_like - The scale (the 1st dimension). - b2 : float or array_like - The scale (the 2nd dimension). + A1 : float or array_like + Location parameter (the 1st dimension). + A2 : float or array_like + Location parameter (the 2nd dimension). + B1 : float or array_like + Scale parameter (the 1st dimension). + B2 : float or array_like + Scale parameter (the 2nd dimension). g1 : float or array_like - The skewness (the 1st dimension). + Skewness parameter (the 1st dimension). g2 : float or array_like - The skewness (the 2nd dimension). + Skewness parameter (the 2nd dimension). k1 : float or array_like - The kurtosis (the 1st dimension). + Kurtosis parameter (the 1st dimension). k2 : float or array_like - The kurtosis (the 2nd dimension). + Kurtosis parameter (the 2nd dimension). rho : float or array_like - The dependence between components (dimensions), [1]. + Parameters' covariance. c : float, optional - The overall asymmetry parameter, as a convention fixed to 0.8 [2]. + Overall asymmetry parameter, by default fixed to 0.8 as in Allingham et al. (2009). n_obs : int, optional - The number of the observed points batch_size : int, optional random_state : np.random.RandomState, optional Returns ------- array_like - The yielded points. + Yielded points. """ - # Standardising the parameters - a1 = np.asanyarray(a1).reshape((-1, 1)) - a2 = np.asanyarray(a2).reshape((-1, 1)) - b1 = np.asanyarray(b1).reshape((-1, 1)) - b2 = np.asanyarray(b2).reshape((-1, 1)) + # Transforming the arrays' shape to be compatible with batching. + A1 = np.asanyarray(A1).reshape((-1, 1)) + A2 = np.asanyarray(A2).reshape((-1, 1)) + B1 = np.asanyarray(B1).reshape((-1, 1)) + B2 = np.asanyarray(B2).reshape((-1, 1)) g1 = np.asanyarray(g1).reshape((-1, 1)) g2 = np.asanyarray(g2).reshape((-1, 1)) k1 = np.asanyarray(k1).reshape((-1, 1, 1)) k2 = np.asanyarray(k2).reshape((-1, 1, 1)) rho = np.asanyarray(rho).reshape((-1, 1)) - a = np.hstack((a1, a2)) - b = np.hstack((b1, b2)) + + # Merging the multi-dimensional parameters. + A = np.hstack((A1, A2)) + B = np.hstack((B1, B2)) g = np.hstack((g1, g2)) k = np.hstack((k1, k2)) - # Sampling from the z term, Equation 3 [1]. - z = [] + # Obtaining z(p) ~ N(0, 1). + z_batches = [] for i in range(batch_size): + # Initialising a separate covariance matrix for each batch. matrix_cov = np.array([[1, rho[i]], [rho[i], 1]]) - z_el = ss.multivariate_normal.rvs(cov=matrix_cov, - size=(n_obs), - random_state=random_state) - z.append(z_el) - z = np.array(z) - # Obtaining the first bracket term, Equation 3 [1]. + z_batch = ss.multivariate_normal.rvs(cov=matrix_cov, size=n_obs, random_state=random_state) + z_batches.append(z_batch) + z = np.array(z_batches) + + # Obtaining the first bracket term of the quantile function Q_{gnk}. gdotz = np.einsum('ik,ijk->ijk', g, z) term_exp = (1 - np.exp(-gdotz)) / (1 + np.exp(-gdotz)) - term_first = np.einsum('ik,ijk->ijk', b, (1 + c * (term_exp))) + term_first = np.einsum('ik,ijk->ijk', B, (1 + c * (term_exp))) - # Obtaining the second bracket term, Equation 3 [1]. + # Obtaining the second bracket term, of the quantile function Q_{gnk}. term_second_unraised = 1 + np.power(z, 2) k = np.repeat(k, n_obs, axis=2) k = np.swapaxes(k, 1, 2) term_second = np.power(term_second_unraised, k) - # Yielding Equation 3, [1]. + # Evaluating the quantile function Q_{gnk}. term_product = term_first * term_second * z term_product_misaligned = np.swapaxes(term_product, 1, 0) - y_misaligned = np.add(a, term_product_misaligned) - y = np.swapaxes(y_misaligned, 1, 0) - - return y + y_misaligned = np.add(A, term_product_misaligned) + y_obs = np.swapaxes(y_misaligned, 1, 0) + return y_obs -def get_model(n_obs=150, true_params=None, stats_summary=None, seed_obs=None): +def get_model(n_obs=150, true_params=None, seed=None): """Return an initialised bivariate g-and-k model. Parameters ---------- n_obs : int, optional - The number of the observed points. + Number of the observations. true_params : array_like, optional - The parameters defining the model. - stats_summary : array_like, optional - The chosen summary statistics, expressed as a list of strings. - Options: ['ss_order'], ['ss_robust'], ['ss_octile']. - seed_obs : np.random.RandomState, optional + Parameters defining the model. + seed : np.random.RandomState, optional Returns ------- elfi.ElfiModel """ - m = elfi.ElfiModel() + m = elfi.new_model() - # Initialising the default parameter settings as given in [1]. + # Initialising the parameters as in Drovandi & Pettitt (2011). if true_params is None: true_params = [3, 4, 1, 0.5, 1, 2, .5, .4, 0.6] - if stats_summary is None: - stats_summary = ['ss_robust'] - - # Initialising the default prior settings as given in [1]. - elfi.Prior('uniform', 0, 5, model=m, name='a1') - elfi.Prior('uniform', 0, 5, model=m, name='a2') - elfi.Prior('uniform', 0, 5, model=m, name='b1') - elfi.Prior('uniform', 0, 5, model=m, name='b2') - elfi.Prior('uniform', -5, 10, model=m, name='g1') - elfi.Prior('uniform', -5, 10, model=m, name='g2') - elfi.Prior('uniform', -.5, 5.5, model=m, name='k1') - elfi.Prior('uniform', -.5, 5.5, model=m, name='k2') - elfi.Prior('uniform', -1 + EPS, 2 - 2 * EPS, model=m, name='rho') - - # Generating the observations. - y_obs = BiGNK(*true_params, n_obs=n_obs, - random_state=np.random.RandomState(seed_obs)) + + # Initialising the prior settings as in Drovandi & Pettitt (2011). + priors = [] + priors.append(elfi.Prior('uniform', 0, 5, model=m, name='a1')) + priors.append(elfi.Prior('uniform', 0, 5, model=m, name='a2')) + priors.append(elfi.Prior('uniform', 0, 5, model=m, name='b1')) + priors.append(elfi.Prior('uniform', 0, 5, model=m, name='b2')) + priors.append(elfi.Prior('uniform', -5, 10, model=m, name='g1')) + priors.append(elfi.Prior('uniform', -5, 10, model=m, name='g2')) + priors.append(elfi.Prior('uniform', -.5, 5.5, model=m, name='k1')) + priors.append(elfi.Prior('uniform', -.5, 5.5, model=m, name='k2')) + EPS = np.finfo(float).eps + priors.append(elfi.Prior('uniform', -1 + EPS, 2 - 2 * EPS, model=m, name='rho')) + + # Obtaining the observations. + y_obs = BiGNK(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed)) # Defining the simulator. - fn_sim = partial(BiGNK, n_obs=n_obs) - elfi.Simulator(fn_sim, m['a1'], m['a2'], m['b1'], m['b2'], m['g1'], - m['g2'], m['k1'], m['k2'], m['rho'], observed=y_obs, - name='BiGNK') - - # Initialising the chosen summary statistics. - fns_summary_all = [ss_order, ss_robust, ss_octile] - fns_summary_chosen = [] - for fn_summary in fns_summary_all: - if fn_summary.__name__ in stats_summary: - summary = elfi.Summary(fn_summary, m['BiGNK'], - name=fn_summary.__name__) - fns_summary_chosen.append(summary) - - # Defining the distance metric. - elfi.Discrepancy(euclidean_multidim, *fns_summary_chosen, name='d') + fn_simulator = partial(BiGNK, n_obs=n_obs) + elfi.Simulator(fn_simulator, *priors, observed=y_obs, name='BiGNK') + + # Initialising the default summary statistics. + default_ss = elfi.Summary(ss_robust, m['BiGNK'], name='ss_robust') + # Using the customEuclidean distance function designed for + # the summary statistics of shape (batch_size, dim_ss, dim_ss_point). + elfi.Discrepancy(euclidean_multiss, default_ss, name='d') return m diff --git a/elfi/examples/gauss.py b/elfi/examples/gauss.py index 89557cc8..efe1a0d4 100644 --- a/elfi/examples/gauss.py +++ b/elfi/examples/gauss.py @@ -1,4 +1,4 @@ -"""Example implementations of Gaussian noise models.""" +"""Implementations of Gaussian noise example models.""" from functools import partial @@ -6,7 +6,6 @@ import scipy.stats as ss import elfi -from elfi.examples.gnk import euclidean_multidim def gauss(mu, sigma, n_obs=50, batch_size=1, random_state=None): @@ -22,11 +21,11 @@ def gauss(mu, sigma, n_obs=50, batch_size=1, random_state=None): Returns ------- - y_obs : array_like - 1-D observation. + array_like + 1-D observations. """ - # Handling batching. + # Transforming the arrays' shape to be compatible with batching. batches_mu = np.asanyarray(mu).reshape((-1, 1)) batches_sigma = np.asanyarray(sigma).reshape((-1, 1)) @@ -51,13 +50,13 @@ def gauss_nd_mean(*mu, cov_matrix, n_obs=15, batch_size=1, random_state=None): Returns ------- - y_obs : array_like - n-D observation. + array_like + n-D observations. """ n_dim = len(mu) - # Handling batching. + # Transforming the arrays' shape to be compatible with batching. batches_mu = np.zeros(shape=(batch_size, n_dim)) for idx_dim, param_mu in enumerate(mu): batches_mu[:, idx_dim] = param_mu @@ -65,28 +64,14 @@ def gauss_nd_mean(*mu, cov_matrix, n_obs=15, batch_size=1, random_state=None): # Sampling the observations. y_obs = np.zeros(shape=(batch_size, n_obs, n_dim)) for idx_batch in range(batch_size): - y_batch = ss.multivariate_normal.rvs(mean=batches_mu[idx_batch], - cov=cov_matrix, - size=n_obs, - random_state=random_state) + y_batch = ss.multivariate_normal.rvs(mean=batches_mu[idx_batch], cov=cov_matrix, + size=n_obs, random_state=random_state) if n_dim == 1: y_batch = y_batch[:, np.newaxis] y_obs[idx_batch, :, :] = y_batch return y_obs -def ss_mean(x): - """Return the summary statistic corresponding to the mean.""" - ss = np.mean(x, axis=1) - return ss - - -def ss_var(x): - """Return the summary statistic corresponding to the variance.""" - ss = np.var(x, axis=1) - return ss - - def get_model(n_obs=50, true_params=None, seed_obs=None, nd_mean=False, cov_matrix=None): """Return a Gaussian noise model. @@ -99,12 +84,12 @@ def get_model(n_obs=50, true_params=None, seed_obs=None, nd_mean=False, cov_matr Seed for the observed data generation. nd_mean : bool, optional Option to use an n-D mean Gaussian noise model. - cov_matrix : None, optional + cov_matrix : array_like, optional Covariance matrix, a requirement for the nd_mean model. Returns ------- - m : elfi.ElfiModel + elfi.ElfiModel """ # Defining the default settings. @@ -116,15 +101,14 @@ def get_model(n_obs=50, true_params=None, seed_obs=None, nd_mean=False, cov_matr # Choosing the simulator for both observations and simulations. if nd_mean: - sim_fn = partial(gauss_nd_mean, cov_matrix=cov_matrix, n_obs=n_obs) + fn_simulator = partial(gauss_nd_mean, cov_matrix=cov_matrix, n_obs=n_obs) else: - sim_fn = partial(gauss, n_obs=n_obs) + fn_simulator = partial(gauss, n_obs=n_obs) # Obtaining the observations. - y_obs = sim_fn(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs)) - - m = elfi.ElfiModel() + y_obs = fn_simulator(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs)) + m = elfi.new_model() # Initialising the priors. eps_prior = 5 # The longest distance from the median of an initialised prior's distribution. priors = [] @@ -138,10 +122,9 @@ def get_model(n_obs=50, true_params=None, seed_obs=None, nd_mean=False, cov_matr else: priors.append(elfi.Prior('uniform', true_params[0] - eps_prior, 2 * eps_prior, model=m, name='mu')) - priors.append(elfi.Prior('truncnorm', - np.amax([.01, true_params[1] - eps_prior]), + priors.append(elfi.Prior('truncnorm', np.amax([.01, true_params[1] - eps_prior]), 2 * eps_prior, model=m, name='sigma')) - elfi.Simulator(sim_fn, *priors, observed=y_obs, name='gauss') + elfi.Simulator(fn_simulator, *priors, observed=y_obs, name='gauss') # Initialising the summary statistics. sumstats = [] @@ -153,5 +136,63 @@ def get_model(n_obs=50, true_params=None, seed_obs=None, nd_mean=False, cov_matr elfi.Discrepancy(euclidean_multidim, *sumstats, name='d') else: elfi.Distance('euclidean', *sumstats, name='d') - return m + + +def ss_mean(y): + """Obtain the mean summary statistic. + + Parameters + ---------- + y : array_like + Yielded points. + + Returns + ------- + array_like of the shape (batch_size, dim_point) + + """ + ss = np.mean(y, axis=1) + return ss + + +def ss_var(y): + """Return the variance summary statistic. + + Parameters + ---------- + y : array_like + Yielded points. + + Returns + ------- + array_like of the shape (batch_size, dim_point) + + """ + ss = np.var(y, axis=1) + return ss + + +def euclidean_multidim(*simulated, observed): + """Calculate the Euclidean distances merging data dimensions. + + The shape of the input arrays corresponds to (batch_size, dim_point). + + Parameters + ---------- + *simulated: array_like + observed : array_like + + Returns + ------- + array_like + + """ + pts_sim = simulated[0] + pts_obs = observed[0] + + # Integrating over the summary statistics. + d_dim_merged = np.sum((pts_sim - pts_obs)**2., axis=1) + + d = np.sqrt(d_dim_merged) + return d diff --git a/elfi/examples/gnk.py b/elfi/examples/gnk.py index 80337c0c..b022c4bc 100644 --- a/elfi/examples/gnk.py +++ b/elfi/examples/gnk.py @@ -1,4 +1,4 @@ -"""An example implementation of the bivariate g-and-k model.""" +"""Implementation of the univariate g-and-k example model.""" from functools import partial @@ -7,249 +7,243 @@ import elfi -EPS = np.finfo(float).eps - -def GNK(a, b, g, k, c=0.8, n_obs=50, batch_size=1, random_state=None): +def GNK(A, B, g, k, c=0.8, n_obs=50, batch_size=1, random_state=None): """Sample the univariate g-and-k distribution. References ---------- - [1] Drovandi, Christopher C., and Anthony N. Pettitt. "Likelihood-free - Bayesian estimation of multivariate quantile distributions." - Computational Statistics & Data Analysis 55.9 (2011): 2541-2556. - [2] Allingham, David, R. AR King, and Kerrie L. Mengersen. "Bayesian - estimation of quantile distributions."Statistics and Computing 19.2 - (2009): 189-201. + [1] Drovandi, C. C., & Pettitt, A. N. (2011). + Likelihood-free Bayesian estimation of multivariate quantile distributions. + Computational Statistics & Data Analysis, 55(9), 2541-2556. + [2] Allingham, D., King, R. A. R., & Mengersen, K. L. (2009). + Bayesian estimation of quantile distributions. + Statistics and Computing, 19(2), 189-201. + + The quantile function of g-and-k distribution is defined as follows: + + Q_{gnk} = A + B * (1 + c * (1 - exp(-g * z(p)) / 1 + exp(-g * z(p)))) + * (1 + z(p)^2)^k * z(p), where + + z(p) is the p-th standard normal quantile. + + To sample from the g-and-k distribution, draw z(p) ~ N(0, 1) and evaluate Q_{gnk}. Parameters ---------- - a : float or array_like - The location. - b : float or array_like - The scale. + A : float or array_like + Location parameter. + B : float or array_like + Scale parameter. g : float or array_like - The skewness. + Skewness parameter. k : float or array_like - The kurtosis. + Kurtosis parameter. c : float, optional - The overall asymmetry parameter, as a convention fixed to 0.8 [2]. + Overall asymmetry parameter, by default fixed to 0.8 as in Allingham et al. (2009). n_obs : int, optional - The number of the observed points batch_size : int, optional random_state : np.random.RandomState, optional Returns ------- array_like - The yielded points. + Yielded points (the array's shape corresponds to (batch_size, n_points, n_dims). """ - # Standardising the parameters - a = np.asanyarray(a).reshape((-1, 1)) - b = np.asanyarray(b).reshape((-1, 1)) + # Transforming the arrays' shape to be compatible with batching. + A = np.asanyarray(A).reshape((-1, 1)) + B = np.asanyarray(B).reshape((-1, 1)) g = np.asanyarray(g).reshape((-1, 1)) k = np.asanyarray(k).reshape((-1, 1)) - # Sampling from the z term, Equation 1, [2]. + # Obtaining z(p) ~ N(0, 1). z = ss.norm.rvs(size=(batch_size, n_obs), random_state=random_state) - # Yielding Equation 1, [2]. - term_exp = (1 - np.exp(-g * z)) / (1 + np.exp(-g * z)) - y = a + b * (1 + c * (term_exp)) * (1 + z**2)**k * z + # Evaluating the quantile function Q_{gnk}. + y = A + B * (1 + c * ((1 - np.exp(-g * z)) / (1 + np.exp(-g * z)))) * (1 + z**2)**k * z - # Dedicating an axis for the data dimensionality. - y = np.expand_dims(y, axis=2) + # Dedicating a dummy axis for the dimensionality of the points. + y = y[:, :, np.newaxis] return y -def get_model(n_obs=50, true_params=None, stats_summary=None, seed_obs=None): - """Return an initialised univariate g-and-k model. +def get_model(n_obs=50, true_params=None, seed=None): + """Initialise the g-and-k model. Parameters ---------- n_obs : int, optional - The number of the observed points. + Number of the observations. true_params : array_like, optional - The parameters defining the model. - stats_summary : array_like, optional - The chosen summary statistics, expressed as a list of strings. - Options: ['ss_order'], ['ss_robust'], ['ss_octile']. - seed_obs : np.random.RandomState, optional + Parameters defining the model. + seed : np.random.RandomState, optional Returns ------- elfi.ElfiModel """ - m = elfi.ElfiModel() + m = elfi.new_model() - # Initialising the default parameter settings as given in [2]. + # Initialising the parameters as in Allingham et al. (2009). if true_params is None: true_params = [3, 1, 2, .5] - if stats_summary is None: - stats_summary = ['ss_order'] - # Initialising the default prior settings as given in [2]. - elfi.Prior('uniform', 0, 10, model=m, name='a') - elfi.Prior('uniform', 0, 10, model=m, name='b') - elfi.Prior('uniform', 0, 10, model=m, name='g') - elfi.Prior('uniform', 0, 10, model=m, name='k') + # Initialising the prior settings as in Allingham et al. (2009). + priors = [] + priors.append(elfi.Prior('uniform', 0, 10, model=m, name='A')) + priors.append(elfi.Prior('uniform', 0, 10, model=m, name='B')) + priors.append(elfi.Prior('uniform', 0, 10, model=m, name='g')) + priors.append(elfi.Prior('uniform', 0, 10, model=m, name='k')) - # Generating the observations. - y_obs = GNK(*true_params, n_obs=n_obs, - random_state=np.random.RandomState(seed_obs)) + # Obtaining the observations. + y_obs = GNK(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed)) # Defining the simulator. - fn_sim = partial(GNK, n_obs=n_obs) - elfi.Simulator(fn_sim, m['a'], m['b'], m['g'], m['k'], observed=y_obs, - name='GNK') - - # Initialising the chosen summary statistics. - fns_summary_all = [ss_order, ss_robust, ss_octile] - fns_summary_chosen = [] - for fn_summary in fns_summary_all: - if fn_summary.__name__ in stats_summary: - summary = elfi.Summary(fn_summary, m['GNK'], - name=fn_summary.__name__) - fns_summary_chosen.append(summary) + fn_simulator = partial(GNK, n_obs=n_obs) + elfi.Simulator(fn_simulator, *priors, observed=y_obs, name='GNK') - elfi.Discrepancy(euclidean_multidim, *fns_summary_chosen, name='d') + # Initialising the summary statistics as in Allingham et al. (2009). + default_ss = elfi.Summary(ss_order, m['GNK'], name='ss_order') + # Using the multi-dimensional Euclidean distance function as + # the summary statistics' implementations are designed for multi-dimensional cases. + elfi.Discrepancy(euclidean_multiss, default_ss, name='d') return m -def euclidean_multidim(*simulated, observed): - """Calculate the multi-dimensional Euclidean distance. +def euclidean_multiss(*simulated, observed): + """Calculate the Euclidean distances merging summary statistics. + + The shape of the arrays corresponds to (batch_size, dim_ss, dim_ss_point), where + dim_ss corresponds to the dimensionality of the summary statistics, and + dim_ss_point corresponds to the dimensionality a summary statistic data point. Parameters ---------- *simulated: array_like - The simulated points. observed : array_like - The observed points. Returns ------- array_like """ - pts_sim = np.stack(simulated, axis=1) - pts_obs = np.stack(observed, axis=1) + pts_sim = simulated[0] + pts_obs = observed[0] + + # Integrating over the summary statistics. d_ss_merged = np.sum((pts_sim - pts_obs)**2., axis=1) - d_dim_merged = np.sum(d_ss_merged, axis=1) - d = np.sqrt(d_dim_merged) + # Integrating over the summary statistics' data point dimensionality. + d_ss_point_merged = np.sum(d_ss_merged, axis=1) + + d = np.sqrt(d_ss_point_merged) return d def ss_order(y): - """Obtain the order summary statistic, [2]. + """Obtain the order summary statistic described in Allingham et al. (2009). - The statistic reaches an optimal performance upon a low number of - observations. + The statistic reaches the optimal performance upon a low number of observations. Parameters ---------- y : array_like - The yielded points. + Yielded points. Returns ------- - array_like + array_like of the shape (batch_size, dim_ss=len(y), dim_ss_point) """ ss_order = np.sort(y) - return ss_order def ss_robust(y): - """Obtain the robust summary statistic, [1]. + """Obtain the robust summary statistic described in Drovandi and Pettitt (2011). - The statistic reaches an optimal performance upon a high number of + The statistic reaches the optimal performance upon a high number of observations. Parameters ---------- y : array_like - The yielded points. + Yielded points. Returns ------- - array_like + array_like of the shape (batch_size, dim_ss=4, dim_ss_point) """ - ss_a = _get_ss_a(y) - ss_b = _get_ss_b(y) + ss_A = _get_ss_A(y) + ss_B = _get_ss_B(y) ss_g = _get_ss_g(y) ss_k = _get_ss_k(y) - # Combining the summary statistics by expanding the dimensionality. - ss_robust = np.hstack((ss_a, ss_b, ss_g, ss_k)) + # Combining the summary statistics. + ss_robust = np.hstack((ss_A, ss_B, ss_g, ss_k)) + ss_robust = ss_robust[:, :, np.newaxis] return ss_robust def ss_octile(y): """Obtain the octile summary statistic. - The statistic reaches an optimal performance upon a high number of - observations. As reported in [1], it is more stable than ss_robust. + The statistic reaches the optimal performance upon a high number of + observations. According to Allingham et al. (2009), it is more stable than ss_robust. Parameters ---------- y : array_like - The yielded points. + Yielded points. Returns ------- - array_like + array_like of the shape (batch_size, dim_ss=8, dim_ss_point) """ octiles = np.linspace(12.5, 87.5, 7) E1, E2, E3, E4, E5, E6, E7 = np.percentile(y, octiles, axis=1) - # Combining the summary statistics by expanding the dimensionality. + # Combining the summary statistics. ss_octile = np.hstack((E1, E2, E3, E4, E5, E6, E7)) - + ss_octile = ss_octile[:, :, np.newaxis] return ss_octile -def _get_ss_a(y): +def _get_ss_A(y): L2 = np.percentile(y, 50, axis=1) - ss_a = L2 - - return ss_a + ss_A = L2 + return ss_A -def _get_ss_b(y): +def _get_ss_B(y): L1, L3 = np.percentile(y, [25, 75], axis=1) - ss_b = L3 - L1 - # Adjusting the zero values to avoid division issues. - ss_b_ravelled = ss_b.ravel() - idxs_zero = np.where(ss_b_ravelled == 0)[0] - ss_b_ravelled[idxs_zero] += EPS + # Avoiding the zero value (ss_B is used for division). + ss_B = (L3 - L1).ravel() + idxs_zero = np.where(ss_B == 0)[0] + ss_B[idxs_zero] += np.finfo(float).eps + + # Transforming the summary statistics back into the compatible shape. n_dim = y.shape[-1] n_batches = y.shape[0] - ss_b = ss_b_ravelled.reshape(n_batches, n_dim) - - return ss_b + ss_B = ss_B.reshape(n_batches, n_dim) + return ss_B def _get_ss_g(y): L1, L2, L3 = np.percentile(y, [25, 50, 75], axis=1) - - ss_b = _get_ss_b(y) - ss_g = np.divide(L3 + L1 - 2 * L2, ss_b) - + ss_B = _get_ss_B(y) + ss_g = np.divide(L3 + L1 - 2 * L2, ss_B) return ss_g def _get_ss_k(y): E1, E3, E5, E7 = np.percentile(y, [12.5, 37.5, 62.5, 87.5], axis=1) - - ss_b = _get_ss_b(y) - ss_k = np.divide(E7 - E5 + E3 - E1, ss_b) - + ss_B = _get_ss_B(y) + ss_k = np.divide(E7 - E5 + E3 - E1, ss_B) return ss_k diff --git a/elfi/examples/lotka_volterra.py b/elfi/examples/lotka_volterra.py new file mode 100644 index 00000000..f9cc6482 --- /dev/null +++ b/elfi/examples/lotka_volterra.py @@ -0,0 +1,253 @@ +"""Example of inference with the stochastic Lotka-Volterra predator prey model. + +Treatment roughly follows: +Owen, J., Wilkinson, D. and Gillespie, C. (2015) Likelihood free inference for + Markov processes: a comparison. Statistical Applications in Genetics and + Molecular Biology, 14(2). +""" + +import logging +from functools import partial + +import numpy as np +import scipy.stats as ss + +import elfi + + +def lotka_volterra(r1, r2, r3, prey_init=50, predator_init=100, sigma=0., n_obs=16, time_end=30., + batch_size=1, random_state=None, return_full=False): + r"""Generate sequences from the stochastic Lotka-Volterra model. + + The Lotka-Volterra model is described by 3 reactions + + R1 : X1 -> 2X1 # prey reproduction + R2 : X1 + X2 -> 2X2 # predator hunts prey and reproduces + R3 : X2 -> 0 # predator death + + The system is solved using the Direct method. + + Gillespie, D. T. (1977) Exact stochastic simulation of coupled chemical reactions. + The Journal of Physical Chemistry 81 (25), 2340–2361. + Lotka, A. J. (1925) Elements of physical biology. Williams & Wilkins Baltimore. + Volterra, V. (1926) Fluctuations in the abundance of a species considered mathematically. + Nature 118, 558–560. + + + Parameters + ---------- + r1 : float or np.array + Rate of R1. + r2 : float or np.array + Rate of R2. + r3 : float or np.array + Rate of R3. + prey_init : int or np.array, optional + Initial number of prey. + predator_init : int or np.array, optional + Initial number of predators. + sigma : float or np.array, optional + Standard deviation of the Gaussian noise added to measurements. + n_obs : int, optional + Number of observations to return at integer frequency. + time_end : float, optional + Time allowed for reactions in the Direct method (not the wall time). + batch_size : int, optional + random_state : np.random.RandomState, optional + return_full : bool, optional + If True, return a tuple (observed_stock, observed_times, full_stock, full_times). + + Returns + ------- + stock_obs : np.array + Observations in shape (batch_size, n_obs, 2). + + """ + random_state = random_state or np.random + + r1 = np.asanyarray(r1).reshape(-1) + r2 = np.asanyarray(r2).reshape(-1) + r3 = np.asanyarray(r3).reshape(-1) + prey_init = np.asanyarray(prey_init).reshape(-1) + predator_init = np.asanyarray(predator_init).reshape(-1) + sigma = np.asanyarray(sigma).reshape(-1) + + n_full = 20000 + stock = np.empty((batch_size, n_full, 2), dtype=np.int32) + stock[:, 0, 0] = prey_init + stock[:, 0, 1] = predator_init + stoichiometry = np.array([[1, 0], [-1, 1], [0, -1], [0, 0]], dtype=np.int32) + times = np.empty((batch_size, n_full)) + times[:, 0] = 0 + + # iterate until all in batch ok + ii = 0 + while np.any(times[:, ii] < time_end): + ii += 1 + + # increase the size of arrays if needed + if ii == n_full: + stock = np.concatenate((stock, np.empty((batch_size, n_full, 2))), axis=1) + times = np.concatenate((times, np.empty((batch_size, n_full))), axis=1) + n_full *= 2 + + # reaction probabilities + hazards = np.column_stack((r1 * stock[:, ii-1, 0], + r2 * stock[:, ii-1, 0] * stock[:, ii-1, 1], + r3 * stock[:, ii-1, 1])) + + with np.errstate(divide='ignore', invalid='ignore'): + inv_sum_hazards = 1. / np.sum(hazards, axis=1, keepdims=True) # inf if all dead + + delta_t = random_state.exponential(inv_sum_hazards.ravel()) + times[:, ii] = times[:, ii-1] + delta_t + + # choose reaction according to their probabilities + probs = hazards * inv_sum_hazards + cumprobs = np.cumsum(probs[:, :-1], axis=1) + x = random_state.uniform(size=(batch_size, 1)) + reaction = np.sum(x >= cumprobs, axis=1) + + # null reaction if both populations dead + reaction = np.where(np.isinf(inv_sum_hazards.ravel()), 3, reaction) + + # update stock + stock[:, ii, :] = stock[:, ii-1, :] + stoichiometry[reaction, :] + + # no point to continue if predators = 0 + times[:, ii] = np.where(stock[:, ii, 1] == 0, time_end, times[:, ii]) + + stock = stock[:, :ii+1, :] + times = times[:, :ii+1] + + times_out = np.linspace(0, time_end, n_obs) + stock_out = np.empty((batch_size, n_obs, 2), dtype=np.int32) + stock_out[:, 0, :] = stock[:, 0, :] + + # observations at even intervals + for ii in range(1, n_obs): + iy, ix = np.where(times >= times_out[ii]) + iy, iix = np.unique(iy, return_index=True) + ix = ix[iix] - 1 + time_term = (times_out[ii] - times[iy, ix]) / (times[iy, ix+1] - times[iy, ix]) + stock_out[:, ii, 0] = (stock[iy, ix+1, 0] - stock[iy, ix, 0]) * time_term \ + + stock[iy, ix, 0] + random_state.normal(scale=sigma, size=batch_size) + stock_out[:, ii, 1] = (stock[iy, ix+1, 1] - stock[iy, ix, 1]) * time_term \ + + stock[iy, ix, 1] + random_state.normal(scale=sigma, size=batch_size) + + if return_full: + return (stock_out, times_out, stock, times) + + return stock_out + + +def get_model(n_obs=50, true_params=None, seed_obs=None, stochastic=True): + """Return a complete Lotka-Volterra model in inference task. + + Parameters + ---------- + n_obs : int, optional + Number of observations. + true_params : list, optional + Parameters with which the observed data is generated. + seed_obs : int, optional + Seed for the observed data generation. + + Returns + ------- + m : elfi.ElfiModel + + """ + logger = logging.getLogger() + simulator = partial(lotka_volterra, n_obs=n_obs) + if true_params is None: + true_params = [1.0, 0.005, 0.6, 50, 100, 10.] + + m = elfi.ElfiModel() + y_obs = simulator(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs)) + sim_fn = partial(simulator, n_obs=n_obs) + priors = [] + sumstats = [] + + priors.append(elfi.Prior(ExpUniform, -2, 0, model=m, name='r1')) + priors.append(elfi.Prior(ExpUniform, -5, -2.5, model=m, name='r2')) # easily kills populations + priors.append(elfi.Prior(ExpUniform, -2, 0, model=m, name='r3')) + priors.append(elfi.Prior('poisson', 50, model=m, name='prey0')) + priors.append(elfi.Prior('poisson', 100, model=m, name='predator0')) + priors.append(elfi.Prior(ExpUniform, np.log(0.5), np.log(50), model=m, name='sigma')) + + elfi.Simulator(sim_fn, *priors, observed=y_obs, name='LV') + sumstats.append(elfi.Summary(partial(pick_stock, species=0), m['LV'], name='prey')) + sumstats.append(elfi.Summary(partial(pick_stock, species=1), m['LV'], name='predator')) + elfi.Distance('sqeuclidean', *sumstats, name='d') + + logger.info("Generated %i observations with true parameters r1: %.1f, r2: %.3f, r3: %.1f, " + "prey0: %i, predator0: %i, sigma: %.1f.", n_obs, *true_params) + + return m + + +class ExpUniform(elfi.Distribution): + r"""Prior distribution for parameter. + + log x ~ Uniform(a,b) + pdf(x) \propto 1/x, if x \in [exp(a), exp(b)] + + """ + + @classmethod + def rvs(cls, a, b, size=1, random_state=None): + """Draw random variates. + + Parameters + ---------- + a : float or array-like + b : float or array-like + size : int, optional + random_state : RandomState, optional + + Returns + ------- + np.array + + """ + u = ss.uniform.rvs(loc=a, scale=b-a, size=size, random_state=random_state) + x = np.exp(u) + return x + + @classmethod + def pdf(cls, x, a, b): + """Density function at `x`. + + Parameters + ---------- + x : float or array-like + a : float or array-like + b : float or array-like + + Returns + ------- + np.array + + """ + with np.errstate(divide='ignore'): + p = np.where((x < np.exp(a)) | (x > np.exp(b)), 0, np.reciprocal(x)) + p /= (b - a) # normalize + return p + + +def pick_stock(stock, species): + """Return the stock for single species. + + Parameters + ---------- + stock : np.array + species : int + 0 for prey, 1 for predator. + + Returns + ------- + np.array + + """ + return stock[:, :, species] diff --git a/elfi/methods/bo/acquisition.py b/elfi/methods/bo/acquisition.py index 3a29db62..30ad82aa 100644 --- a/elfi/methods/bo/acquisition.py +++ b/elfi/methods/bo/acquisition.py @@ -3,8 +3,10 @@ import logging import numpy as np -from scipy.stats import truncnorm, uniform +import scipy.linalg as sl +import scipy.stats as ss +import elfi.methods.mcmc as mcmc from elfi.methods.bo.utils import minimize logger = logging.getLogger(__name__) @@ -63,6 +65,7 @@ def __init__(self, self.noise_var = noise_var self.exploration_rate = exploration_rate self.random_state = np.random if seed is None else np.random.RandomState(seed) + self.seed = 0 if seed is None else seed def evaluate(self, x, t=None): """Evaluate the acquisition function at 'x'. @@ -106,7 +109,7 @@ def acquire(self, n, t=None): The shape is (n, input_dim) """ - logger.debug('Acquiring the next batch of {} values'.format(n)) + logger.debug('Acquiring the next batch of %d values', n) # Optimize the current minimum def obj(x): @@ -145,7 +148,7 @@ def _add_noise(self, x): xi = x[:, i] a = (self.model.bounds[i][0] - xi) / std b = (self.model.bounds[i][1] - xi) / std - x[:, i] = truncnorm.rvs( + x[:, i] = ss.truncnorm.rvs( a, b, loc=xi, scale=std, size=len(x), random_state=self.random_state) return x @@ -201,6 +204,8 @@ def __init__(self, *args, delta=None, **kwargs): kwargs['exploration_rate'] = 1 / delta super(LCBSC, self).__init__(*args, **kwargs) + self.name = 'lcbsc' + self.label_fn = 'Confidence Bound' @property def delta(self): @@ -244,6 +249,492 @@ def evaluate_gradient(self, x, t=None): return grad_mean - 0.5 * grad_var * np.sqrt(self._beta(t) / var) +class MaxVar(AcquisitionBase): + r"""The maximum variance acquisition method. + + The next evaluation point is acquired in the maximiser of the variance of + the unnormalised approximate posterior. + + \theta_{t+1} = arg max Var(p(\theta) * p_a(\theta)), + + where the unnormalised likelihood p_a is defined + using the CDF of normal distribution, \Phi, as follows: + + p_a(\theta) = + (\Phi((\epsilon - \mu_{1:t}(\theta)) / \sqrt(v_{1:t}(\theta) + \sigma2_n))), + + where \epsilon is the ABC threshold, \mu_{1:t} and v_{1:t} are + determined by the Gaussian process, \sigma2_n is the noise. + + References + ---------- + [1] Järvenpää et al. (2017). arXiv:1704.00520 + [2] Gutmann M U, Corander J (2016). Bayesian Optimization for + Likelihood-Free Inference of Simulator-Based Statistical Models. + JMLR 17(125):1−47, 2016. http://jmlr.org/papers/v17/15-017.html + + """ + + def __init__(self, quantile_eps=.01, *args, **opts): + """Initialise MaxVar. + + Parameters + ---------- + quantile_eps : int, optional + Quantile of the observed discrepancies used in setting the ABC threshold. + + """ + super(MaxVar, self).__init__(*args, **opts) + self.name = 'max_var' + self.label_fn = 'Variance of the Unnormalised Approximate Posterior' + self.quantile_eps = quantile_eps + # The ABC threshold is initialised to a pre-set value as the gp is not yet fit. + self.eps = .1 + + def acquire(self, n, t=None): + """Acquire a batch of acquisition points. + + Parameters + ---------- + n : int + Number of acquisitions. + t : int, optional + Current iteration, (unused). + + Returns + ------- + array_like + Coordinates of the yielded acquisition points. + + """ + logger.debug('Acquiring the next batch of %d values', n) + gp = self.model + + # Updating the ABC threshold. + self.eps = np.percentile(gp.Y, self.quantile_eps * 100) + + def _negate_eval(theta): + return -self.evaluate(theta) + + def _negate_eval_grad(theta): + return -self.evaluate_gradient(theta) + + # Obtaining the location where the variance is maximised. + theta_max, _ = minimize(_negate_eval, + gp.bounds, + _negate_eval_grad, + self.prior, + self.n_inits, + self.max_opt_iters, + random_state=self.random_state) + + # Using the same location for all points in theta batch. + batch_theta = np.tile(theta_max, (n, 1)) + + return batch_theta + + def evaluate(self, theta_new, t=None): + """Evaluate the acquisition function at the location theta_new. + + Parameters + ---------- + theta_new : array_like + Evaluation coordinates. + t : int, optional + Current iteration, (unused). + + Returns + ------- + array_like + Variance of the approximate posterior. + + """ + mean, var = self.model.predict(theta_new, noiseless=True) + sigma2_n = self.model.noise + + # Using the cdf of Skewnorm to avoid explicit Owen's T computation. + a = np.sqrt(sigma2_n) / np.sqrt(sigma2_n + 2. * var) # Skewness. + scale = np.sqrt(sigma2_n + var) + phi_skew = ss.skewnorm.cdf(self.eps, a, loc=mean, scale=scale) + phi_norm = ss.norm.cdf(self.eps, loc=mean, scale=scale) + var_p_a = phi_skew - phi_norm**2 + + val_prior = self.prior.pdf(theta_new).ravel()[:, np.newaxis] + + var_approx_posterior = val_prior**2 * var_p_a + return var_approx_posterior + + def evaluate_gradient(self, theta_new, t=None): + """Evaluate the acquisition function's gradient at the location theta_new. + + Parameters + ---------- + theta_new : array_like + Evaluation coordinates. + t : int, optional + Current iteration, (unused). + + Returns + ------- + array_like + Gradient of the variance of the approximate posterior + + """ + phi = ss.norm.cdf + mean, var = self.model.predict(theta_new, noiseless=True) + grad_mean, grad_var = self.model.predictive_gradients(theta_new) + sigma2_n = self.model.noise + scale = np.sqrt(sigma2_n + var) + + a = (self.eps - mean) / scale + b = np.sqrt(sigma2_n) / np.sqrt(sigma2_n + 2 * var) + grad_a = (-1. / scale) * grad_mean - \ + ((self.eps - mean) / (2. * (sigma2_n + var)**(1.5))) * grad_var + grad_b = (-np.sqrt(sigma2_n) / (sigma2_n + 2 * var)**(1.5)) * grad_var + + _phi_a = phi(a) + int_1 = _phi_a - _phi_a**2 + int_2 = phi(self.eps, loc=mean, scale=scale) \ + - ss.skewnorm.cdf(self.eps, b, loc=mean, scale=scale) + grad_int_1 = (1. - 2 * _phi_a) * \ + (np.exp(-.5 * (a**2)) / np.sqrt(2. * np.pi)) * grad_a + grad_int_2 = (1. / np.pi) * \ + (((np.exp(-.5 * (a**2) * (1. + b**2))) / (1. + b**2)) * grad_b + + (np.sqrt(np.pi / 2.) * np.exp(-.5 * (a**2)) * (1. - 2. * phi(a * b)) * grad_a)) + + # Obtaining the gradient prior by applying the following rule: + # (log f(x))' = f'(x)/f(x) => f'(x) = (log f(x))' * f(x) + term_prior = self.prior.pdf(theta_new).ravel()[:, np.newaxis] + grad_prior_log = self.prior.gradient_logpdf(theta_new) + term_grad_prior = term_prior * grad_prior_log + + gradient = 2. * term_prior * (int_1 - int_2) * term_grad_prior + \ + term_prior**2 * (grad_int_1 - grad_int_2) + return gradient + + +class RandMaxVar(MaxVar): + r"""The randomised maximum variance acquisition method. + + The next evaluation point is sampled from the density corresponding to the + variance of the unnormalised approximate posterior (The MaxVar acquisition function). + + \theta_{t+1} ~ q(\theta), + + where q(\theta) \propto Var(p(\theta) * p_a(\theta)) and + the unnormalised likelihood p_a is defined + using the CDF of normal distribution, \Phi, as follows: + + p_a(\theta) = + (\Phi((\epsilon - \mu_{1:t}(\theta)) / \sqrt(\v_{1:t}(\theta) + \sigma2_n))), + + where \epsilon is the ABC threshold, \mu_{1:t} and \v_{1:t} are + determined by the Gaussian process, \sigma2_n is the noise. + + + References + ---------- + [1] arXiv:1704.00520 (Järvenpää et al., 2017) + + """ + + def __init__(self, quantile_eps=.01, sampler='nuts', n_samples=50, + limit_faulty_init=10, sigma_proposals_metropolis=None, *args, **opts): + """Initialise RandMaxVar. + + Parameters + ---------- + quantile_eps : int, optional + Quantile of the observed discrepancies used in setting the ABC threshold. + sampler : string, optional + Name of the sampler (options: metropolis, nuts). + n_samples : int, optional + Length of the sampler's chain for obtaining the acquisitions. + limit_faulty_init : int, optional + Limit for the iterations used to obtain the sampler's initial points. + sigma_proposals_metropolis : array_like, optional + Standard deviation proposals for tuning the metropolis sampler. + For the default settings, the sigmas are set to the 1/10 + of the parameter intervals' length. + + """ + super(RandMaxVar, self).__init__(quantile_eps, *args, **opts) + self.name = 'rand_max_var' + self.name_sampler = sampler + self._n_samples = n_samples + self._limit_faulty_init = limit_faulty_init + self._sigma_proposals_metropolis = sigma_proposals_metropolis + + def acquire(self, n, t=None): + """Acquire a batch of acquisition points. + + Parameters + ---------- + n : int + Number of acquisitions. + t : int, optional + Current iteration, (unused). + + Returns + ------- + array_like + Coordinates of the yielded acquisition points. + + """ + if n > self._n_samples: + raise ValueError("The number of acquisitions, n, has to be lower" + "than the number of the samples (%d)." + .format(self._n_samples)) + + logger.debug('Acquiring the next batch of %d values', n) + gp = self.model + + # Updating the ABC threshold. + self.eps = np.percentile(gp.Y, self.quantile_eps * 100) + + def _evaluate_gradient_logpdf(theta): + denominator = self.evaluate(theta) + if denominator == 0: + return -np.inf + pt_eval = self.evaluate_gradient(theta) / denominator + return pt_eval.ravel() + + def _evaluate_logpdf(theta): + val_pdf = self.evaluate(theta) + if val_pdf == 0: + return -np.inf + return np.log(val_pdf) + + # Obtaining the RandMaxVar acquisition. + for i in range(self._limit_faulty_init + 1): + if i > self._limit_faulty_init: + raise SystemExit("Unable to find a suitable initial point.") + + # Proposing the initial point. + theta_init = np.zeros(shape=len(gp.bounds)) + for idx_param, range_bound in enumerate(gp.bounds): + theta_init[idx_param] = self.random_state.uniform(range_bound[0], range_bound[1]) + + # Refusing to accept a faulty initial point. + if np.isinf(_evaluate_logpdf(theta_init)): + continue + + # Sampling the acquisition using the chosen sampler. + if self.name_sampler == 'metropolis': + if self._sigma_proposals_metropolis is None: + # Setting the default values of the sigma proposals to 1/10 + # of each parameters interval's length. + sigma_proposals = [] + for bound in self.model.bounds: + length_interval = bound[1] - bound[0] + sigma_proposals.append(length_interval / 10) + self._sigma_proposals_metropolis = sigma_proposals + samples = mcmc.metropolis(self._n_samples, + theta_init, + _evaluate_logpdf, + sigma_proposals=self._sigma_proposals_metropolis, + seed=self.seed) + elif self.name_sampler == 'nuts': + samples = mcmc.nuts(self._n_samples, + theta_init, + _evaluate_logpdf, + _evaluate_gradient_logpdf, + seed=self.seed) + else: + raise ValueError( + "Incompatible sampler. Please check the options in the documentation.") + + # Using the last n points of the MH chain for the acquisition batch. + batch_theta = samples[-n:, :] + break + + return batch_theta + + +class ExpIntVar(MaxVar): + r"""The Expected Integrated Variance (ExpIntVar) acquisition method. + + Essentially, we define a loss function that measures the overall uncertainty + in the unnormalised ABC posterior over the parameter space. + The value of the loss function depends on the next simulation and thus + the next evaluation location \theta^* is chosen to minimise the expected loss. + + \theta_{t+1} = arg min_{\theta^* \in \Theta} L_{1:t}(\theta^*), where + + \Theta is the parameter space, and L is the expected loss function approximated as follows: + + L_{1:t}(\theta^*) \approx 2 * \sum_{i=1}^s (\omega^i * p^2(\theta^i) + * w_{1:t+1})(theta^i, \theta^*), where + + \omega^i is an importance weight, + p^2(\theta^i) is the prior squared, and + w_{1:t+1})(theta^i, \theta^*) is the expected variance of the unnormalised ABC posterior + at \theta^i after running the simulation model with parameter \theta^* + + References + ---------- + [1] arXiv:1704.00520 (Järvenpää et al., 2017) + + """ + + def __init__(self, quantile_eps=.01, integration='grid', d_grid=.2, + n_samples_imp=100, iter_imp=2, sampler='nuts', n_samples=2000, + sigma_proposals_metropolis=None, *args, **opts): + """Initialise ExpIntVar. + + Parameters + ---------- + quantile_eps : int, optional + Quantile of the observed discrepancies used in setting the discrepancy threshold. + integration : str, optional + Integration method. Options: + - grid (points are taken uniformly): more accurate yet + computationally expensive in high dimensions; + - importance (points are taken based on the importance weight): less accurate though + applicable in high dimensions. + d_grid : float, optional + Grid tightness. + n_samples_imp : int, optional + Number of importance samples. + iter_imp : int, optional + Gap between acquisition iterations in performing importance sampling. + sampler : string, optional + Sampler for generating random numbers from the proposal distribution for IS. + (Options: metropolis, nuts.) + n_samples : int, optional + Chain length for the sampler that generates the random numbers + from the proposal distribution for IS. + sigma_proposals_metropolis : array_like, optional + Standard deviation proposals for tuning the metropolis sampler. + + """ + super(ExpIntVar, self).__init__(quantile_eps, *args, **opts) + self.name = 'exp_int_var' + self.label_fn = 'Expected Loss' + self._integration = integration + self._n_samples_imp = n_samples_imp + self._iter_imp = iter_imp + + if self._integration == 'importance': + self.density_is = RandMaxVar(model=self.model, + prior=self.prior, + n_inits=self.n_inits, + seed=self.seed, + quantile_eps=self.quantile_eps, + sampler=sampler, + n_samples=n_samples, + sigma_proposals_metropolis=sigma_proposals_metropolis) + elif self._integration == 'grid': + grid_param = [slice(b[0], b[1], d_grid) for b in self.model.bounds] + self.points_int = np.mgrid[grid_param].reshape(len(self.model.bounds), -1).T + + def acquire(self, n, t): + """Acquire a batch of acquisition points. + + Parameters + ---------- + n : int + Number of acquisitions. + t : int + Current iteration. + + Returns + ------- + array_like + Coordinates of the yielded acquisition points. + + """ + logger.debug('Acquiring the next batch of %d values', n) + gp = self.model + self.sigma2_n = gp.noise + + # Updating the discrepancy threshold. + self.eps = np.percentile(gp.Y, self.quantile_eps * 100) + + # Performing the importance sampling step every self._iter_imp iterations. + if self._integration == 'importance' and t % self._iter_imp == 0: + self.points_int = self.density_is.acquire(self._n_samples_imp) + + # Obtaining the omegas_int and priors_int terms to be used in the evaluate function. + self.mean_int, self.var_int = gp.predict(self.points_int, noiseless=True) + self.priors_int = (self.prior.pdf(self.points_int)**2)[np.newaxis, :] + if self._integration == 'importance' and t % self._iter_imp == 0: + omegas_int_unnormalised = (1 / MaxVar.evaluate(self, self.points_int)).T + self.omegas_int = omegas_int_unnormalised / \ + np.sum(omegas_int_unnormalised, axis=1)[:, np.newaxis] + elif self._integration == 'grid': + self.omegas_int = np.empty(len(self.points_int)) + self.omegas_int.fill(1 / len(self.points_int)) + + # Initialising the attributes used in the evaluate function. + self.thetas_old = np.array(gp.X) + self._K = gp._gp.kern.K + self.K = self._K(self.thetas_old, self.thetas_old) + \ + self.sigma2_n * np.identity(self.thetas_old.shape[0]) + self.k_int_old = self._K(self.points_int, self.thetas_old).T + self.phi_int = ss.norm.cdf(self.eps, loc=self.mean_int.T, + scale=np.sqrt(self.sigma2_n + self.var_int.T)) + + # Obtaining the location where the expected loss is minimised. + # Note: The gradient is computed numerically as GPy currently does not + # directly provide the derivative computations used in Järvenpää et al., 2017. + theta_min, _ = minimize(self.evaluate, + gp.bounds, + grad=None, + prior=self.prior, + n_start_points=self.n_inits, + maxiter=self.max_opt_iters, + random_state=self.random_state) + + # Using the same location for all points in the batch. + batch_theta = np.tile(theta_min, (n, 1)) + return batch_theta + + def evaluate(self, theta_new, t=None): + """Evaluate the acquisition function at the location theta_new. + + Parameters + ---------- + theta_new : array_like + Evaluation coordinates. + t : int, optional + Current iteration, (unused). + + Returns + ------- + array_like + Expected loss's term dependent on theta_new. + + """ + gp = self.model + n_imp, n_dim = self.points_int.shape + # Alter the shape of theta_new. + if n_dim != 1 and theta_new.ndim == 1: + theta_new = theta_new[np.newaxis, :] + elif n_dim == 1 and theta_new.ndim == 1: + theta_new = theta_new[:, np.newaxis] + + # Calculate the integrand term w. + # Note: w's second term (given in Järvenpää et al., 2017) is dismissed + # because it is constant with respect to theta_new. + _, var_new = gp.predict(theta_new, noiseless=True) + k_old_new = self._K(self.thetas_old, theta_new) + k_int_new = self._K(self.points_int, theta_new).T + # Using the Cholesky factorisation to avoid computing matrix inverse. + term_chol = sl.cho_solve(sl.cho_factor(self.K), k_old_new) + cov_int = k_int_new - np.dot(self.k_int_old.T, term_chol).T + delta_var_int = cov_int**2 / (self.sigma2_n + var_new) + a = np.sqrt((self.sigma2_n + self.var_int.T - delta_var_int) / + (self.sigma2_n + self.var_int.T + delta_var_int)) + # Using the skewnorm's cdf to substitute the Owen's T function. + phi_skew_imp = ss.skewnorm.cdf(self.eps, a, loc=self.mean_int.T, + scale=np.sqrt(self.sigma2_n + self.var_int.T)) + w = ((self.phi_int - phi_skew_imp) / 2) + + loss_theta_new = 2 * np.sum(self.omegas_int * self.priors_int * w, axis=1) + return loss_theta_new + + class UniformAcquisition(AcquisitionBase): """Acquisition from uniform distribution.""" @@ -264,5 +755,5 @@ def acquire(self, n, t=None): """ bounds = np.stack(self.model.bounds) - return uniform(bounds[:, 0], bounds[:, 1] - bounds[:, 0]) \ + return ss.uniform(bounds[:, 0], bounds[:, 1] - bounds[:, 0]) \ .rvs(size=(n, self.model.input_dim), random_state=self.random_state) diff --git a/elfi/methods/bo/gpy_regression.py b/elfi/methods/bo/gpy_regression.py index 19b0a5fe..8999a45f 100644 --- a/elfi/methods/bo/gpy_regression.py +++ b/elfi/methods/bo/gpy_regression.py @@ -338,6 +338,16 @@ def Y(self): """Return output evidence.""" return self._gp.Y + @property + def noise(self): + """Return the noise.""" + return self._gp.Gaussian_noise.variance[0] + + @property + def instance(self): + """Return the gp instance.""" + return self._gp + def copy(self): """Return a copy of current instance.""" kopy = copy.copy(self) diff --git a/elfi/methods/bo/utils.py b/elfi/methods/bo/utils.py index 3c207ea0..281c8f80 100644 --- a/elfi/methods/bo/utils.py +++ b/elfi/methods/bo/utils.py @@ -1,7 +1,8 @@ """Utilities for Bayesian optimization.""" import numpy as np -from scipy.optimize import differential_evolution, fmin_l_bfgs_b +import scipy.optimize +from scipy.optimize import differential_evolution # TODO: remove or combine to minimize @@ -81,20 +82,19 @@ def minimize(fun, for i in range(ndim): start_points[:, i] = np.clip(start_points[:, i], *bounds[i]) + # Run the optimisation from each initialization point. locs = [] vals = np.empty(n_start_points) - - # Run optimization from each initialization point for i in range(n_start_points): - if grad is not None: - result = fmin_l_bfgs_b( - fun, start_points[i, :], fprime=grad, bounds=bounds, maxiter=maxiter) - else: - result = fmin_l_bfgs_b( - fun, start_points[i, :], approx_grad=True, bounds=bounds, maxiter=maxiter) - locs.append(result[0]) - vals[i] = result[1] - - # Return the optimal case + result = scipy.optimize.minimize(fun, start_points[i, :], + method='L-BFGS-B', jac=grad, bounds=bounds) + locs.append(result['x']) + vals[i] = result['fun'] + + # Return the optimal case. ind_min = np.argmin(vals) + locs_out = locs[ind_min] + for i in range(ndim): + locs_out[i] = np.clip(locs_out[i], *bounds[i]) + return locs[ind_min], vals[ind_min] diff --git a/elfi/methods/diagnostics.py b/elfi/methods/diagnostics.py new file mode 100644 index 00000000..be5bfeef --- /dev/null +++ b/elfi/methods/diagnostics.py @@ -0,0 +1,289 @@ +"""Methods for ABC diagnostics.""" + +import logging +from itertools import combinations + +import numpy as np +from scipy.spatial import cKDTree +from scipy.special import digamma, gamma + +import elfi + +logger = logging.getLogger(__name__) + + +class TwoStageSelection: + """Perform the summary-statistics selection proposed by Nunes and Balding (2010). + + The user can provide a list of summary statistics as list_ss, and let ELFI to combine them, + or provide some already combined summary statistics as prepared_ss. + + The rationale of the Two Stage procedure procedure is the following: + + - First, the module computes or accepts the combinations of the candidate summary statistics. + - In Stage 1, each summary-statistics combination is evaluated using the + Minimum Entropy algorithm. + - In Stage 2, the minimum-entropy combination is selected, + and the 'closest' datasets are identified. + - Further in Stage 2, for each summary-statistics combination, + the mean root sum of squared errors (MRSSE) is calculated over all 'closest datasets', + and the minimum-MRSSE combination is chosen as the one with the optimal performance. + + References + ---------- + [1] Nunes, M. A., & Balding, D. J. (2010). + On optimal selection of summary statistics for approximate Bayesian computation. + Statistical applications in genetics and molecular biology, 9(1). + [2] Blum, M. G., Nunes, M. A., Prangle, D., & Sisson, S. A. (2013). + A comparative review of dimension reduction methods in approximate Bayesian computation. + Statistical Science, 28(2), 189-208. + + """ + + def __init__(self, simulator, fn_distance, list_ss=None, prepared_ss=None, + max_cardinality=4, seed=0): + """Initialise the summary-statistics selection for the Two Stage Procedure. + + Parameters + ---------- + simulator : elfi.Node + Node (often elfi.Simulator) for which the summary statistics will be applied. + The node is the final node of a coherent ElfiModel (i.e. it has no child nodes). + fn_distance : str or callable function + Distance metric, consult the elfi.Distance documentation for calling as a string. + list_ss : List of callable functions, optional + List of candidate summary statistics. + prepared_ss : List of lists of callable functions, optional + List of prepared combinations of candidate summary statistics. + No other combinations will be evaluated. + max_cardinality : int, optional + Maximum cardinality of a candidate summary-statistics combination. + seed : int, optional + + """ + if list_ss is None and prepared_ss is None: + raise ValueError('No summary statistics to assess.') + + self.simulator = simulator + self.fn_distance = fn_distance + self.seed = seed + if prepared_ss is not None: + self.ss_candidates = prepared_ss + else: + self.ss_candidates = self._combine_ss(list_ss, max_cardinality=max_cardinality) + # Initialising an output pool as the rejection sampling will be used several times. + self.pool = elfi.OutputPool(simulator.name) + + def _combine_ss(self, list_ss, max_cardinality): + """Create all combinations of the initialised summary statistics up till the maximum cardinality. + + Parameters + ---------- + list_ss : List of callable functions + List of candidate summary statistics. + max_cardinality : int + Maximum cardinality of a candidate summary-statistics combination. + + Returns + ------- + List + Combinations of candidate summary statistics. + + """ + if max_cardinality > len(list_ss): + max_cardinality = len(list_ss) + + # Combine the candidate summary statistics. + combinations_ss = [] + for i in range(max_cardinality): + for combination in combinations(list_ss, i + 1): + combinations_ss.append(combination) + return combinations_ss + + def run(self, n_sim, n_acc=None, n_closest=None, batch_size=1, k=4): + """Run the Two Stage Procedure for identifying relevant summary statistics. + + Parameters + ---------- + n_sim : int + Number of the total ABC-rejection simulations. + n_acc : int, optional + Number of the accepted ABC-rejection simulations. + n_closest : int, optional + Number of the 'closest' datasets + (i.e., the closest n simulation datasets w.r.t the observations). + batch_size : int, optional + Number of samples per batch. + k : int, optional + Parameter for the kth-nearest-neighbour search performed in the minimum-entropy step + (in Nunes & Balding, 2010 it is fixed to 4). + + Returns + ------- + array_like + Summary-statistics combination showing the optimal performance. + + """ + # Setting the default value of n_acc to the .01 quantile of n_sim, + # and n_closest to the .01 quantile of n_acc as in Nunes and Balding (2010). + if n_acc is None: + n_acc = int(n_sim / 100) + if n_closest is None: + n_closest = int(n_acc / 100) + if n_sim < n_acc or n_acc < n_closest or n_closest == 0: + raise ValueError("The number of simulations is too small.") + + # Find the summary-statistics combination with the minimum entropy, and + # preserve the parameters (thetas) corresponding to the `closest' datasets. + thetas = {} + E_me = np.inf + names_ss_me = [] + for set_ss in self.ss_candidates: + names_ss = [ss.__name__ for ss in set_ss] + thetas_ss = self._obtain_accepted_thetas(set_ss, n_sim, n_acc, batch_size) + thetas[set_ss] = thetas_ss + E_ss = self._calc_entropy(thetas_ss, n_acc, k) + # If equal, dismiss the combination which contains uninformative summary statistics. + if (E_ss == E_me and (len(names_ss_me) > len(names_ss))) or E_ss < E_me: + E_me = E_ss + names_ss_me = names_ss + thetas_closest = thetas_ss[:n_closest] + logger.info('Combination %s shows the entropy of %f' % (names_ss, E_ss)) + # Note: entropy is in the log space (negative values allowed). + logger.info('\nThe minimum entropy of %f was found in %s.\n' % (E_me, names_ss_me)) + + # Find the summary-statistics combination with + # the minimum mean root sum of squared error (MRSSE). + MRSSE_min = np.inf + names_ss_MRSSE = [] + for set_ss in self.ss_candidates: + names_ss = [ss.__name__ for ss in set_ss] + MRSSE_ss = self._calc_MRSSE(set_ss, thetas_closest, thetas[set_ss]) + # If equal, dismiss the combination which contains uninformative summary statistics. + if (MRSSE_ss == MRSSE_min and (len(names_ss_MRSSE) > len(names_ss))) \ + or MRSSE_ss < MRSSE_min: + MRSSE_min = MRSSE_ss + names_ss_MRSSE = names_ss + set_ss_2stage = set_ss + logger.info('Combination %s shows the MRSSE of %f' % (names_ss, MRSSE_ss)) + logger.info('\nThe minimum MRSSE of %f was found in %s.' % (MRSSE_min, names_ss_MRSSE)) + return set_ss_2stage + + def _obtain_accepted_thetas(self, set_ss, n_sim, n_acc, batch_size): + """Perform the ABC-rejection sampling and identify `closest' parameters. + + The sampling is performed using the initialised simulator. + + Parameters + ---------- + set_ss : List + Summary-statistics combination to be used in the rejection sampling. + n_sim : int + Number of the iterations of the rejection sampling. + n_acc : int + Number of the accepted parameters. + batch_size : int + Number of samples per batch. + + Returns + ------- + array_like + Accepted parameters. + + """ + # Initialise the distance function. + m = self.simulator.model.copy() + list_ss = [] + for ss in set_ss: + list_ss.append(elfi.Summary(ss, m[self.simulator.name], model=m)) + if isinstance(self.fn_distance, str): + d = elfi.Distance(self.fn_distance, *list_ss, model=m) + else: + d = elfi.Discrepancy(self.fn_distance, *list_ss, model=m) + + # Run the simulations. + # TODO: include different distance functions in the summary-statistics combinations. + sampler_rejection = elfi.Rejection(d, batch_size=batch_size, + seed=self.seed, pool=self.pool) + result = sampler_rejection.sample(n_acc, n_sim=n_sim) + + # Extract the accepted parameters. + thetas_acc = result.samples_array + return thetas_acc + + def _calc_entropy(self, thetas_ss, n_acc, k): + """Calculate the entropy as described in Nunes & Balding, 2010. + + E = log( pi^(q/2) / gamma(q/2+1) ) - digamma(k) + log(n) + + q/n * sum_{i=1}^n( log(R_{i, k}) ), where + + R_{i, k} is the Euclidean distance from the parameter theta_i to + its kth nearest neighbour; + q is the dimensionality of the parameter; and + n is the number of the accepted parameters n_acc in the rejection sampling. + + Parameters + ---------- + thetas_ss : array_like + Parameters accepted upon the rejection sampling using + the summary-statistics combination ss. + n_acc : int + Number of the accepted parameters. + k : int + Nearest neighbour to be searched. + + Returns + ------- + float + Entropy. + + """ + q = thetas_ss.shape[1] + + # Calculate the distance to the kth nearest neighbour across all accepted parameters. + searcher_knn = cKDTree(thetas_ss) + sum_log_dist_knn = 0 + for theta_ss in thetas_ss: + dist_knn = searcher_knn.query(theta_ss, k=k)[0][-1] + sum_log_dist_knn += np.log(dist_knn) + + # Calculate the entropy. + E = np.log(np.pi**(q / 2) / gamma((q / 2) + 1)) - digamma(k) \ + + np.log(n_acc) + (q / n_acc) * sum_log_dist_knn + return E + + def _calc_MRSSE(self, set_ss, thetas_obs, thetas_sim): + """Calculate the mean root of squared error (MRSSE) as described in Nunes & Balding, 2010. + + MRSSE = 1/n * sum_{j=1}^n( RSSE(j) ), + + RSSE = 1/m * sum_{i=1}^m( theta_i - theta_true ), where + + n is the number of the `closest' datasets identified using + the summary-statistics combination corresponding to the minimum entropy; + m is the number of the accepted parameters in the rejection sampling for set_ss; + theta_i is an instance of the parameters corresponding to set_ss; and + theta_true is the parameters corresponding to a `closest' dataset. + + Parameters + ---------- + set_ss : List + Summary-statistics combination used in the rejection sampling. + thetas_obs : array_like + List of parameters corresponding to the `closest' datasets. + thetas_sim : array_like + Parameters corresponding to set_ss. + + Returns + ------- + float + Mean root of squared error. + + """ + RSSE_total = 0 + for theta_obs in thetas_obs: + SSE = np.linalg.norm(thetas_sim - theta_obs)**2 + RSSE = np.sqrt(SSE) + RSSE_total += RSSE + MRSSE = RSSE_total / len(thetas_obs) + return MRSSE diff --git a/elfi/model/elfi_model.py b/elfi/model/elfi_model.py index 91c8a839..0b6b1d1e 100644 --- a/elfi/model/elfi_model.py +++ b/elfi/model/elfi_model.py @@ -968,7 +968,7 @@ def __init__(self, discrepancy, *parents, **kwargs): class Distance(Discrepancy): """A convenience class for the discrepancy node.""" - def __init__(self, distance, *summaries, p=None, w=None, V=None, VI=None, **kwargs): + def __init__(self, distance, *summaries, **kwargs): """Initialize a distance node of an ELFI graph. This class contains many common distance implementations through scipy. @@ -983,18 +983,12 @@ def __init__(self, distance, *summaries, p=None, w=None, V=None, VI=None, **kwar that contains the observed values (summaries). The callable should return a vector of distances between the simulated summaries and the observed summaries. - summaries - summary nodes of the model - p : double, optional - The p-norm to apply Only for distance Minkowski (`'minkowski'`), weighted - and unweighted. Default: 2. - w : ndarray, optional - The weight vector. Only for weighted Minkowski (`'wminkowski'`). Mandatory. - V : ndarray, optional - The variance vector. Only for standardized Euclidean (`'seuclidean'`). - Mandatory. - VI : ndarray, optional - The inverse of the covariance matrix. Only for Mahalanobis. Mandatory. + *summaries + Summary nodes of the model. + **kwargs + Additional parameters may be required depending on the chosen distance. + See the scipy documentation. (The support is not exhaustive.) + ELFI-related kwargs are passed on to elfi.Discrepancy. Examples -------- @@ -1021,16 +1015,23 @@ def __init__(self, distance, *summaries, p=None, w=None, V=None, VI=None, **kwar raise ValueError("This node requires that at least one parent is specified.") if isinstance(distance, str): - if distance == 'wminkowski' and w is None: + cdist_kwargs = dict(metric=distance) + if distance == 'wminkowski' and 'w' not in kwargs.keys(): raise ValueError('Parameter w must be specified for distance=wminkowski.') - if distance == 'seuclidean' and V is None: + elif distance == 'seuclidean' and 'V' not in kwargs.keys(): raise ValueError('Parameter V must be specified for distance=seuclidean.') - if distance == 'mahalanobis' and VI is None: + elif distance == 'mahalanobis' and 'VI' not in kwargs.keys(): raise ValueError('Parameter VI must be specified for distance=mahalanobis.') - cdist_kwargs = dict(metric=distance, p=p, w=w, V=V, VI=VI) + + # extract appropriate keyword arguments (depends on distance, not exhaustive!) + for key in ['p', 'w', 'V', 'VI']: + if key in kwargs.keys(): + cdist_kwargs[key] = kwargs.pop(key) + dist_fn = partial(scipy.spatial.distance.cdist, **cdist_kwargs) else: dist_fn = distance + discrepancy = partial(distance_as_discrepancy, dist_fn) super(Distance, self).__init__(discrepancy, *summaries, **kwargs) # Store the original passed distance diff --git a/elfi/visualization/visualization.py b/elfi/visualization/visualization.py index d3736e5a..4ae3dc90 100644 --- a/elfi/visualization/visualization.py +++ b/elfi/visualization/visualization.py @@ -146,7 +146,7 @@ def plot_marginals(samples, selector=None, bins=20, axes=None, **kwargs): samples = _limit_params(samples, selector) ncols = kwargs.pop('ncols', 5) kwargs['sharey'] = kwargs.get('sharey', True) - shape = (max(1, len(samples) // ncols), min(len(samples), ncols)) + shape = (max(1, round(len(samples) / ncols + 0.5)), min(len(samples), ncols)) axes, kwargs = _create_axes(axes, shape, **kwargs) axes = axes.ravel() for ii, k in enumerate(samples.keys()): diff --git a/tests/conftest.py b/tests/conftest.py index 7213e864..c5587f36 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,6 +11,9 @@ import elfi.clients.native as native import elfi.examples.gauss import elfi.examples.ma2 +from elfi.methods.bo.gpy_regression import GPyRegression +from elfi.methods.bo.acquisition import ExpIntVar, MaxVar, RandMaxVar +from elfi.methods.utils import ModelPrior elfi.clients.native.set_as_default() @@ -92,6 +95,91 @@ def ma2(): return elfi.examples.ma2.get_model() +@pytest.fixture() +def acq_maxvar(): + """Initialise a MaxVar fixture. + + Returns + ------- + MaxVar + Acquisition method. + + """ + gp, prior = _get_dependencies_acq_fn() + + # Initialising the acquisition method. + method_acq = MaxVar(model=gp, prior=prior) + return method_acq + + +@pytest.fixture() +def acq_randmaxvar(): + """Initialise a RandMaxVar fixture. + + Returns + ------- + RandMaxVar + Acquisition method. + + """ + gp, prior = _get_dependencies_acq_fn() + + # Initialising the acquisition method. + method_acq = RandMaxVar(model=gp, prior=prior) + return method_acq + + +@pytest.fixture() +def acq_expintvar(): + """Initialise an ExpIntVar fixture. + + Returns + ------- + ExpIntVar + Acquisition method. + + """ + gp, prior = _get_dependencies_acq_fn() + + # Initialising the acquisition method. + method_acq = ExpIntVar(model=gp, prior=prior) + return method_acq + + +def _get_dependencies_acq_fn(): + """Provide the requirements for the MaxVar-based acquisition function initialisation. + + Returns + ------- + (GPy.model.GPRegression, elfi.methods.utils.ModelPrior) + Tuple containing a fit gp and a prior. + + """ + mean = [4, 4] + cov_matrix = [[1, .5], [.5, 1]] + names_param = ['mu_0', 'mu_1'] + eps_prior = 5 # The prior's range indicator used in the Gaussian noise model. + bounds_param = {'mu_0': (mean[0] - eps_prior, mean[0] + eps_prior), + 'mu_1': (mean[1] - eps_prior, mean[1] + eps_prior)} + + # Initialising the prior. + gm_2d = elfi.examples.gauss.get_model(true_params=mean, nd_mean=True, cov_matrix=cov_matrix) + prior = ModelPrior(gm_2d) + + # Generating the coordinates and the values of the fitting data. + n_pts_fit = 10 + x1 = np.random.uniform(*bounds_param['mu_0'], n_pts_fit) + x2 = np.random.uniform(*bounds_param['mu_1'], n_pts_fit) + x = np.column_stack((x1, x2)) + y = np.random.rand(n_pts_fit) + + # Fitting the gp with the generated points. + gp = GPyRegression(names_param, bounds=bounds_param) + gp.update(x, y) + + return gp, prior + + def sleeper(sec, batch_size, random_state): secs = np.zeros(batch_size) for i, s in enumerate(sec): diff --git a/tests/unit/test_bo.py b/tests/unit/test_bo.py index 1cf006e6..eb0cd80e 100644 --- a/tests/unit/test_bo.py +++ b/tests/unit/test_bo.py @@ -122,3 +122,105 @@ def test_acquisition(): assert new.shape == (n2, n_params) assert np.all((new[:, 0] >= bounds['a'][0]) & (new[:, 0] <= bounds['a'][1])) assert np.all((new[:, 1] >= bounds['b'][0]) & (new[:, 1] <= bounds['b'][1])) + + +class Test_MaxVar: + """Run a collection of tests for the MaxVar acquisition.""" + + def test_acq_bounds(self, acq_maxvar): + """Check if the acquisition is performed within the bounds. + + Parameters + ---------- + acq_maxvar : MaxVar + Acquisition method. + + """ + bounds = acq_maxvar.model.bounds + n_dim_fixture = len(acq_maxvar.model.bounds) + batch_size = 2 + n_it = 2 + + # Acquiring points. + for it in range(n_it): + batch_theta = acq_maxvar.acquire(n=batch_size, t=it) + + # Checking if the acquired points are within the bounds. + for dim in range(n_dim_fixture): + assert np.all((batch_theta[:, dim] >= bounds[dim][0]) & + (batch_theta[:, dim] <= bounds[dim][1])) + + def test_gradient(self, acq_maxvar): + """Test the gradient function using GPy's GradientChecker. + + Parameters + ---------- + acq_maxvar : MaxVar + Acquisition method. + + """ + from GPy.models.gradient_checker import GradientChecker + n_pts_test = 100 + n_dim_fixture = len(acq_maxvar.model.bounds) + + checker_grad = GradientChecker(acq_maxvar.evaluate, + acq_maxvar.evaluate_gradient, + np.random.randn(n_pts_test, n_dim_fixture)) + + # The tolerance corresponds to the allowed deviation from the unity of + # the ratio between analytical and numerical gradients. + assert checker_grad.checkgrad(tolerance=1e-4) + + +class Test_RandMaxVar: + """Run a collection of tests for the RandMaxVar acquisition.""" + + def test_acq_bounds(self, acq_randmaxvar): + """Check if the acquisition is performed within the bounds. + + Parameters + ---------- + acq_randmaxvar : RandMaxVar + Acquisition method. + + """ + bounds = acq_randmaxvar.model.bounds + n_dim_fixture = len(acq_randmaxvar.model.bounds) + batch_size = 2 + n_it = 2 + + # Acquiring points. + for it in range(n_it): + batch_theta = acq_randmaxvar.acquire(n=batch_size, t=it) + + # Checking if the acquired points are within the bounds. + for dim in range(n_dim_fixture): + assert np.all((batch_theta[:, dim] >= bounds[dim][0]) & + (batch_theta[:, dim] <= bounds[dim][1])) + + +class Test_ExpIntVar: + """Run a collection of tests for the ExpIntVar acquisition.""" + + def test_acq_bounds(self, acq_expintvar): + """Check if the acquisition is performed within the bounds. + + Parameters + ---------- + acq_expintvar : ExpIntVar + Acquisition method. + + """ + bounds = acq_expintvar.model.bounds + n_dim_fixture = len(acq_expintvar.model.bounds) + batch_size = 2 + n_it = 2 + + # Acquiring points. + for it in range(n_it): + batch_theta = acq_expintvar.acquire(n=batch_size, t=it) + + # Checking if the acquired points are within the bounds. + for dim in range(n_dim_fixture): + assert np.all((batch_theta[:, dim] >= bounds[dim][0]) & + (batch_theta[:, dim] <= bounds[dim][1])) diff --git a/tests/unit/test_diagnostics.py b/tests/unit/test_diagnostics.py new file mode 100644 index 00000000..4289fa38 --- /dev/null +++ b/tests/unit/test_diagnostics.py @@ -0,0 +1,57 @@ +"""Tests for ABC diagnostics.""" +from functools import partial + +import numpy as np + +import elfi +import elfi.examples.gauss as Gauss +import elfi.examples.ma2 as MA2 +from elfi.methods.diagnostics import TwoStageSelection + + +class TestTwoStageProcedure: + """Tests for the Two-Stage Procedure (selection of summary statistics).""" + + @classmethod + def setup_class(cls): + """Refresh ELFI upon initialising the test class.""" + elfi.new_model() + + def teardown_method(self, method): + """Refresh ELFI after the execution of the test class's each method.""" + elfi.new_model() + + def test_ma2(self, seed=0): + """Identifying the optimal summary statistics combination following the MA2 model. + + Parameters + ---------- + seed : int, optional + + """ + # Defining summary statistics. + ss_mean = Gauss.ss_mean + ss_ac_lag1 = partial(MA2.autocov, lag=1) + ss_ac_lag1.__name__ = 'ac_lag1' + ss_ac_lag2 = partial(MA2.autocov, lag=2) + ss_ac_lag2.__name__ = 'ac_lag2' + + list_ss = [ss_ac_lag1, ss_ac_lag2, ss_mean] + + # Initialising the simulator. + prior_t1 = elfi.Prior(MA2.CustomPrior1, 2, name='prior_t1') + prior_t2 = elfi.Prior(MA2.CustomPrior2, prior_t1, 1, name='prior_t2') + + t1_true = .6 + t2_true = .2 + fn_simulator = MA2.MA2 + y_obs = fn_simulator(t1_true, t2_true, random_state=np.random.RandomState(seed)) + simulator = elfi.Simulator(fn_simulator, prior_t1, prior_t2, observed=y_obs) + + # Identifying the optimal summary statistics based on the Two-Stage procedure. + diagnostics = TwoStageSelection(simulator, 'euclidean', list_ss=list_ss, seed=seed) + set_ss_2stage = diagnostics.run(n_sim=100000, batch_size=10000) + + assert ss_ac_lag1 in set_ss_2stage + assert ss_ac_lag2 in set_ss_2stage + assert ss_mean not in set_ss_2stage diff --git a/tests/unit/test_examples.py b/tests/unit/test_examples.py index 6e0f09bb..f12c8a67 100644 --- a/tests/unit/test_examples.py +++ b/tests/unit/test_examples.py @@ -1,13 +1,15 @@ +"""Simple running tests for examples.""" + import os import pytest import elfi -from elfi.examples import bdm, gauss, ricker, gnk, bignk +from elfi.examples import bdm, bignk, gauss, gnk, lotka_volterra, ricker def test_bdm(): - """Currently only works in unix-like systems and with a cloned repository""" + """Currently only works in unix-like systems and with a cloned repository.""" cpp_path = bdm.get_sources_path() do_cleanup = False @@ -80,3 +82,9 @@ def test_bignk(stats_summary=['ss_octile']): m = bignk.get_model() rej = elfi.Rejection(m, m['d'], batch_size=10) rej.sample(20) + + +def test_Lotka_Volterra(): + m = lotka_volterra.get_model() + rej = elfi.Rejection(m, m['d'], batch_size=10) + rej.sample(20)