From 147206c3155d0066a8bfb9e1e68d4e4a9e0c0359 Mon Sep 17 00:00:00 2001 From: Henri Pesonen Date: Mon, 29 Mar 2021 15:20:49 +0200 Subject: [PATCH] V080 (#369) * Preparing release v080. * Modify rtd-documents for new methods. * Update CHANGELOG.rst --- CHANGELOG.rst | 2 + Makefile | 16 +- README.md | 2 +- docs/api.rst | 8 + docs/conf.py | 3 +- docs/index.rst | 6 +- docs/quickstart.rst | 4 +- docs/usage/BOLFI.rst | 12 +- docs/usage/adaptive_distance.rst | 410 ++++++++++++++++++++ docs/usage/adaptive_threshold_selection.rst | 186 +++++++++ docs/usage/external.rst | 10 +- docs/usage/parallelization.rst | 4 +- docs/usage/tutorial.rst | 24 +- elfi/__init__.py | 2 +- 14 files changed, 653 insertions(+), 36 deletions(-) create mode 100644 docs/usage/adaptive_distance.rst create mode 100644 docs/usage/adaptive_threshold_selection.rst diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0e7064d6..5ec2d321 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,8 @@ Changelog ========= +0.8.0 (2021-03-29) +------------------ - Merge adaptive distance ABC-SMC and ABC-SMC functionalities - Split `DensityRatioEstimation` from utils.py into separate file - Refactor parameter_inferency.py into methodtype-wise individual files diff --git a/Makefile b/Makefile index aad8137d..679ebe2e 100644 --- a/Makefile +++ b/Makefile @@ -72,23 +72,29 @@ docs: ## generate Sphinx HTML documentation, including API docs $(MAKE) -C docs html # $(BROWSER) docs/_build/html/index.html -CONTENT_URL := http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/ +CONTENT_URL := https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/ notebook-docs: ## Conver notebooks to rst docs. Assumes you have them in `notebooks` directory. jupyter nbconvert --to rst ../notebooks/quickstart.ipynb --output-dir docs sed -i '' 's|\(quickstart_files/quickstart.*\.\)|'${CONTENT_URL}'\1|g' docs/quickstart.rst jupyter nbconvert --to rst ../notebooks/tutorial.ipynb --output-dir docs/usage - sed -i '' 's|\(tutorial_files/tutorial.*\.\)|'${CONTENT_URL}usage/'\1|g' docs/usage/tutorial.rst + sed -i '' 's|\(tutorial_files/tutorial.*\.\)|'${CONTENT_URL}'\1|g' docs/usage/tutorial.rst + + jupyter nbconvert --to rst ../notebooks/adaptive_distance.ipynb --output-dir docs/usage + sed -i '' 's|\(adaptive_distance_files/adaptive_distance.*\.\)|'${CONTENT_URL}'\1|g' docs/usage/adaptive_distance.rst + + jupyter nbconvert --to rst ../notebooks/adaptive_threshold_selection.ipynb --output-dir docs/usage + sed -i '' 's|\(adaptive_threshold_selection_files/adaptive_threshold_selection.*\.\)|'${CONTENT_URL}'\1|g' docs/usage/adaptive_threshold_selection.rst jupyter nbconvert --to rst ../notebooks/BOLFI.ipynb --output-dir docs/usage - sed -i '' 's|\(BOLFI_files/BOLFI.*\.\)|'${CONTENT_URL}usage/'\1|g' docs/usage/BOLFI.rst + sed -i '' 's|\(BOLFI_files/BOLFI.*\.\)|'${CONTENT_URL}'\1|g' docs/usage/BOLFI.rst jupyter nbconvert --to rst ../notebooks/parallelization.ipynb --output-dir docs/usage - sed -i '' 's|\(parallelization_files/parallelization.*\.\)|'${CONTENT_URL}usage/'\1|g' docs/usage/parallelization.rst + sed -i '' 's|\(parallelization_files/parallelization.*\.\)|'${CONTENT_URL}'\1|g' docs/usage/parallelization.rst jupyter nbconvert --to rst ../notebooks/non_python_operations.ipynb --output-dir docs/usage --output=external - sed -i '' 's|\(external_files/external.*\.\)|'${CONTENT_URL}usage/'\1|g' docs/usage/external.rst + sed -i '' 's|\(external_files/external.*\.\)|'${CONTENT_URL}'\1|g' docs/usage/external.rst # release: clean ## package and upload a release # python setup.py sdist upload diff --git a/README.md b/README.md index 53b5efa3..6a89da87 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -**Version 0.7.7 released!** See the [CHANGELOG](CHANGELOG.rst) and [notebooks](https://github.com/elfi-dev/notebooks). +**Version 0.8.0 released!** See the [CHANGELOG](CHANGELOG.rst) and [notebooks](https://github.com/elfi-dev/notebooks). ELFI - Engine for Likelihood-Free Inference =========================================== diff --git a/docs/api.rst b/docs/api.rst index 357a1ae6..fbcb5554 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -50,6 +50,7 @@ Below is a list of inference methods included in ELFI. elfi.Rejection elfi.SMC elfi.AdaptiveDistanceSMC + elfi.AdaptiveThresholdSMC elfi.BayesianOptimization elfi.BOLFI elfi.ROMC @@ -205,6 +206,10 @@ Inference API classes :members: :inherited-members: +.. autoclass:: elfi.AdaptiveThresholdSMC + :members: + :inherited-members: + .. autoclass:: elfi.BayesianOptimization :members: :inherited-members: @@ -238,6 +243,9 @@ Inference API classes :members: :inherited-members: +.. autoclass:: RomcSample + :members: + :inherited-members: **Post-processing** diff --git a/docs/conf.py b/docs/conf.py index 402ca86f..fff11d07 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -32,7 +32,8 @@ def __getattr__(cls, name): 'distributed', 'distributed.client', 'graphviz', 'matplotlib', 'sobol_seq', 'GPy', 'dask.delayed', 'scipy.linalg', 'scipy.optimize', 'scipy.stats', 'scipy.spatial', 'scipy.sparse', 'scipy.special', 'matplotlib.pyplot', 'numpy.random', 'networkx', - 'ipyparallel', 'numpy.lib', 'numpy.lib.format', 'sklearn.linear_model' + 'ipyparallel', 'numpy.lib', 'numpy.lib.format', 'sklearn.linear_model', + 'sklearn.pipeline', 'sklearn.preprocessing' ] sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) diff --git a/docs/index.rst b/docs/index.rst index 0ab31897..34c377b3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -23,11 +23,13 @@ Currently implemented LFI methods: - ABC rejection sampler - Sequential Monte Carlo ABC sampler -- SMC-ABC sampler with `adaptive distance`_ +- ABC-SMC sampler with `adaptive distance`_ +- ABC-SMC sampler with `adaptive threshold selection`_ - Bayesian Optimization for Likelihood-Free Inference (BOLFI_) framework - Robust Optimization Monte Carlo (ROMC_) framework .. _adaptive distance: https://projecteuclid.org/euclid.ba/1460641065 +.. _adaptive threshold selection: https://projecteuclid.org/journals/bayesian-analysis/advance-publication/Adaptive-Approximate-Bayesian-Computation-Tolerance-Selection/10.1214/20-BA1211.full .. _BOLFI: http://jmlr.org/papers/v17/15-017.html .. _ROMC: http://proceedings.mlr.press/v108/ikonomov20a.html @@ -56,6 +58,8 @@ Additionally, ELFI integrates tools for visualization, model comparison, diagnos :caption: Usage usage/tutorial + usage/adaptive_distance + usage/adaptive_threshold_selection usage/parallelization usage/BOLFI usage/external diff --git a/docs/quickstart.rst b/docs/quickstart.rst index f643f26c..2de9f24f 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -96,7 +96,7 @@ visualize the model: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/quickstart_files/quickstart_11_0.svg +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/quickstart_files/quickstart_11_0.svg @@ -134,5 +134,5 @@ Let’s plot also the marginal distributions for the parameters: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/quickstart_files/quickstart_16_0.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/quickstart_files/quickstart_16_0.png diff --git a/docs/usage/BOLFI.rst b/docs/usage/BOLFI.rst index 783acdbf..df881533 100644 --- a/docs/usage/BOLFI.rst +++ b/docs/usage/BOLFI.rst @@ -51,7 +51,7 @@ the basic tutorial, and load it from ready-made examples: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/BOLFI_files/BOLFI_5_0.svg +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/BOLFI_files/BOLFI_5_0.svg @@ -166,7 +166,7 @@ investigated further: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/BOLFI_files/BOLFI_15_1.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/BOLFI_files/BOLFI_15_1.png It may be useful to see the acquired parameter values and the resulting @@ -178,7 +178,7 @@ discrepancies: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/BOLFI_files/BOLFI_17_0.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/BOLFI_files/BOLFI_17_0.png There could be an unnecessarily high number of points at parameter @@ -212,7 +212,7 @@ triangle): -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/BOLFI_files/BOLFI_23_0.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/BOLFI_files/BOLFI_23_0.png Sampling @@ -300,7 +300,7 @@ convenience methods: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/BOLFI_files/BOLFI_29_0.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/BOLFI_files/BOLFI_29_0.png The black vertical lines indicate the end of warmup, which by default is @@ -312,5 +312,5 @@ half of the number of iterations. -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/BOLFI_files/BOLFI_31_0.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/BOLFI_files/BOLFI_31_0.png diff --git a/docs/usage/adaptive_distance.rst b/docs/usage/adaptive_distance.rst new file mode 100644 index 00000000..542b9344 --- /dev/null +++ b/docs/usage/adaptive_distance.rst @@ -0,0 +1,410 @@ +Adaptive distance +================= + +`ABC `__ +provides means to sample an approximate posterior distribution over +unknown parameters based on comparison between observed and simulated +data. This comparison is often based on distance between features that +summarise the data and are informative about the parameter values. + +Here we assume that the summaries calculated based on observed and +simulated data are compared based on weighted distance with weight +:math:`w_i=1/\sigma_i` calculated based on their standard deviation +:math:`\sigma_i`. This ensures that the selected summaries to have an +equal contribution in the distance between observed and simulated data. + +This notebook studies `adaptive +distance `__ +`SMC-ABC `__ +where :math:`\sigma_i` and :math:`w_i` are recalculated between SMC +iterations as proposed in [`1 <#Reference>`__]. + +.. code:: ipython3 + + import numpy as np + import scipy.stats as ss + import matplotlib.pyplot as plt + %matplotlib inline + +.. code:: ipython3 + + import elfi + +Example 1: +---------- + +Assume we have an unknown parameter with prior distribution +:math:`\theta\sim U(0,50)` and two simulator outputs +:math:`S_1\sim N(\theta, 1)` and :math:`S_2\sim N(\theta, 100)` whose +observed values are 20. + +.. code:: ipython3 + + def simulator(mu, batch_size=1, random_state=None): + + batches_mu = np.asarray(mu).reshape((-1,1)) + + obs_1 = ss.norm.rvs(loc=batches_mu, scale=1, random_state=random_state).reshape((-1,1)) + obs_2 = ss.norm.rvs(loc=batches_mu, scale=100, random_state=random_state).reshape((-1,1)) + + return np.hstack((obs_1, obs_2)) + +.. code:: ipython3 + + observed_data = np.array([20,20])[None,:] + +Here the simulator outputs are both informative about the unknown model +parameter, but :math:`S_2` has more observation noise than :math:`S_1`. +We do not calculate separate summaries in this example, but compare +observed and simulated data based on these two variables. + +Euclidean distance between observed and simulated outputs or summaries +can be used to find parameter values that could produce the observed +data. Here we describe dependencies between the unknown parameter value +and observed distances as an ELFI model ``m`` and sample the approximate +posterior distribution with the `rejection +sampler `__. + +.. code:: ipython3 + + m = elfi.new_model() + theta = elfi.Prior(ss.uniform, 0, 50, model=m) + sim = elfi.Simulator(simulator, theta, observed=observed_data) + d = elfi.Distance('euclidean', sim) + +.. code:: ipython3 + + rej = elfi.Rejection(d, batch_size=10000, seed=123) + +Let us sample 100 parameters with ``quantile=0.01``. This means that we +sample 10000 candidate parameters from the prior distribution and take +the 100 parameters that produce simulated data closest to the observed +data. + +.. code:: ipython3 + + sample = rej.sample(100, quantile=0.01) + + +.. parsed-literal:: + + Progress [==================================================] 100.0% Complete + + +.. code:: ipython3 + + sample + + + + +.. parsed-literal:: + + Method: Rejection + Number of samples: 100 + Number of simulations: 10000 + Threshold: 6.66 + Sample means: theta: 19.6 + + + +.. code:: ipython3 + + plt.hist(sample.samples_array,range=(0,50),bins=20) + plt.xlabel('theta'); + + + +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/adaptive_distance_files/adaptive_distance_12_0.png + + +The approximate posterior sample is concentrated around +:math:`\theta=20` as expected in this example. However the sample +distribution is much wider than we would observe in case the sample was +selected based on :math:`S_1` alone. + +Now let us test adaptive distance in the same example. + +First we switch the distance node ``d`` to an adaptive distance node and +initialise adaptive distance SMC-ABC. Initialisation is identical to the +rejection sampler, and here we use the same batch size and seed as +earlier, so that the methods are presented with the exact same candidate +parameters. + +.. code:: ipython3 + + d.become(elfi.AdaptiveDistance(sim)) + +.. code:: ipython3 + + ada_smc = elfi.AdaptiveDistanceSMC(d, batch_size=10000, seed=123) + +Since this is an iterative method, we must decide both sample size +(``n_samples``) and how many populations are sampled (``rounds``). In +addition we can decide the :math:`\alpha` quantile (``quantile``) used +in estimation. + +Each population with ``n_samples`` parameter values is sampled as +follows: 1. ``n_samples/quantile`` parameters are sampled from the +current proposal distribution with acceptance threshold determined based +on the previous population and 2. the distance measure is updated based +on the observed sample and ``n_samples`` with the smallest distance are +selected as the new population. The first population is sampled from the +prior distribution and all samples are accepted in step 1. + +Here we sample one population with ``quantile=0.01``. This means that +the total simulation count will be the same as with the rejection +sampler, but now the distance function is updated based on the 10000 +simulated observations, and the 100 parameters included in the posterior +sample are selected based on the new distance measure. + +.. code:: ipython3 + + sample_ada = ada_smc.sample(100, 1, quantile=0.01) + + +.. parsed-literal:: + + ABC-SMC Round 1 / 1 + Progress [==================================================] 100.0% Complete + + +.. code:: ipython3 + + sample_ada + + + + +.. parsed-literal:: + + Method: AdaptiveDistanceSMC + Number of samples: 100 + Number of simulations: 10000 + Threshold: 0.462 + Sample means: theta: 19.8 + + + +.. code:: ipython3 + + plt.hist(sample_ada.samples_array,range=(0,50),bins=20) + plt.xlabel('theta'); + + + +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/adaptive_distance_files/adaptive_distance_19_0.png + + +We see that the posterior distribution over unknown parameter values is +narrower than in the previous example. This is because the simulator +outputs are now normalised based on their estimated standard deviation. + +We can see :math:`w_1` and :math:`w_2`: + +.. code:: ipython3 + + sample_ada.adaptive_distance_w + + + + +.. parsed-literal:: + + [array([0.06940134, 0.0097677 ])] + + + +Example 2: +---------- + +This is the normal distribution example presented in +[`1 <#Reference>`__]. + +Here we have an unknown parameter with prior distribution +:math:`\theta\sim N(0,100)` and two simulator outputs +:math:`S_1\sim N(\theta, 0.1)` and :math:`S_2\sim N(1, 1)` whose +observed values are 0. + +.. code:: ipython3 + + def simulator(mu, batch_size=1, random_state=None): + + batches_mu = np.asarray(mu).reshape((-1,1)) + + obs_1 = ss.norm.rvs(loc=batches_mu, scale=0.1, random_state=random_state).reshape((-1,1)) + obs_2 = ss.norm.rvs(loc=1, scale=1, size=batch_size, random_state=random_state).reshape((-1,1)) + + return np.hstack((obs_1, obs_2)) + +.. code:: ipython3 + + observed_data = np.array([0,0])[None,:] + +:math:`S_1` is now informative and :math:`S_2` uninformative about the +unknown parameter value, and we note that between the two output +variables, :math:`S_1` has larger variance under the prior predictive +distribution. This means that normalisation estimated based on output +data observed in the initial round or based on a separate sample would +not work well in this example. + +Let us define a new model and initialise adaptive distance SMC-ABC. + +.. code:: ipython3 + + m = elfi.new_model() + theta = elfi.Prior(ss.norm, 0, 100, model=m) + sim = elfi.Simulator(simulator, theta, observed=observed_data) + d = elfi.AdaptiveDistance(sim) + +.. code:: ipython3 + + ada_smc = elfi.AdaptiveDistanceSMC(d, batch_size=2000, seed=123) + +Next we sample 1000 parameter values in 5 rounds with the default +``quantile=0.5`` which is recommended in sequential estimation +[`1 <#Reference>`__]: + +.. code:: ipython3 + + sample_ada = ada_smc.sample(1000, 5) + + +.. parsed-literal:: + + ABC-SMC Round 1 / 5 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 2 / 5 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 3 / 5 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 4 / 5 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 5 / 5 + Progress [==================================================] 100.0% Complete + + +.. code:: ipython3 + + sample_ada + + + + +.. parsed-literal:: + + Method: AdaptiveDistanceSMC + Number of samples: 1000 + Number of simulations: 32000 + Threshold: 0.925 + Sample means: theta: -0.195 + + + +.. code:: ipython3 + + plt.hist(sample_ada.samples_array, range=(-25,25), bins=20) + plt.xlabel(theta); + + + +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/adaptive_distance_files/adaptive_distance_31_0.png + + +The sample distribution is concentrated around :math:`\theta=0` but +wider than could be expected. However we can continue the iterative +estimation process. Here we sample two more populations: + +.. code:: ipython3 + + sample_ada = ada_smc.sample(1000, 2) + + +.. parsed-literal:: + + ABC-SMC Round 6 / 7 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 7 / 7 + Progress [==================================================] 100.0% Complete + + +.. code:: ipython3 + + sample_ada + + + + +.. parsed-literal:: + + Method: AdaptiveDistanceSMC + Number of samples: 1000 + Number of simulations: 48000 + Threshold: 0.868 + Sample means: theta: 0.0183 + + + +.. code:: ipython3 + + plt.hist(sample_ada.samples_array, range=(-25,25), bins=20) + plt.xlabel('theta'); + + + +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/adaptive_distance_files/adaptive_distance_35_0.png + + +We observe that the sample mean is now closer to zero and the sample +distribution is narrower. + +Let us examine :math:`w_1` and :math:`w_2`: + +.. code:: ipython3 + + sample_ada.adaptive_distance_w + + + + +.. parsed-literal:: + + [array([0.01023228, 1.00584519]), + array([0.00921258, 0.99287166]), + array([0.01201937, 0.99365522]), + array([0.02217631, 0.98925365]), + array([0.04355987, 1.00076738]), + array([0.07863284, 0.9971017 ]), + array([0.13892778, 1.00929049])] + + + +We can see that :math:`w_2` (second column) is constant across +iterations whereas :math:`w_1` increases as the method learns more about +possible parameter values and the proposal distribution becomes more +concentrated around :math:`\theta=0`. + +Notes +----- + +The adaptive distance SMC-ABC method demonstrated in this notebook +normalises simulator outputs or summaries calculated based on simulator +output based on their estimated standard deviation under the proposal +distribution in each iteration. This ensures that all outputs or +summaries have an equal contribution to the distance between simulated +and observed data in all iterations. + +It is important to note that the method does not evaluate whether +outputs or summaries are needed or informative. In both examples studied +in this notebook, results would improve if inference was carried out +based on :math:`S_1` alone. Hence one should choose the summaries used +in adaptive distance SMC-ABC with the usual care. ELFI tools that aid in +the selection process are discussed in the diagnostics notebook +available `here `__. + +Reference +--------- + +[1] Prangle D (2017). Adapting the ABC Distance Function. Bayesian +Analysis 12(1): 289-309, 2017. +https://projecteuclid.org/euclid.ba/1460641065 diff --git a/docs/usage/adaptive_threshold_selection.rst b/docs/usage/adaptive_threshold_selection.rst new file mode 100644 index 00000000..6013a5c6 --- /dev/null +++ b/docs/usage/adaptive_threshold_selection.rst @@ -0,0 +1,186 @@ +Adaptive Approximate Bayesian Computation Tolerance Selection +------------------------------------------------------------- + +To use ABC-SMC one has to choose the tolerance threshold or acceptance +quantiles beforehand to run the method. However, Simola et al. [1] +describe an adaptive strategy for selecting the threshold based on +density ratios of rejection ABC posteriors. This tutorial teaches you +how to use the adaptive threshold selection method. + +.. code:: ipython3 + + import numpy as np + # from scipy.stats import norm + import scipy.stats as ss + import elfi + import logging + import matplotlib + import matplotlib.pyplot as plt + + from scipy.stats import gaussian_kde + + %matplotlib inline + + # Set an arbitrary global seed to keep the randomly generated quantities the same + seed = 10 + np.random.seed(seed) + + + +We reproduce the Example 1 from [1] as a test case for +``AdaptiveThresholdSMC``. + +.. code:: ipython3 + + def gaussian_mixture(theta, batch_size=1, random_state=None): + sigma1 = 1 + sigma2 = np.sqrt(0.01) + sigmas = np.array((sigma1, sigma2)) + mixture_prob = 0.5 + random_state = random_state or np.random + + scale_array = random_state.choice(sigmas, + size=batch_size, + replace=True, + p=np.array((mixture_prob, 1-mixture_prob))) + observation = ss.norm.rvs(loc=theta, + scale=scale_array, + size=batch_size, + random_state=random_state) + + return observation + + +.. code:: ipython3 + + yobs = 0 + model = elfi.ElfiModel() + elfi.Prior('uniform', -10, 20, name='theta', model=model) + elfi.Simulator(gaussian_mixture, model['theta'], observed=yobs, name='GM') + elfi.Distance('euclidean', model['GM'], name='d'); + +.. code:: ipython3 + + smc = elfi.SMC(model['d'], batch_size=500, seed=1) + thresholds = [1., 0.5013, 0.2519, 0.1272, 0.0648, 0.0337, 0.0181, 0.0102, 0.0064, 0.0025] + smc_samples = smc.sample(1000, thresholds=thresholds) + + +.. parsed-literal:: + + ABC-SMC Round 1 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 2 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 3 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 4 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 5 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 6 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 7 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 8 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 9 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 10 / 10 + Progress [==================================================] 100.0% Complete + + +Adaptive threshold selection ABC (``elfi.AdaptiveThresholdSMC``) can be +used in similar fashion as ``elfi.SMC``. One does not need to provide a +list of thresholds but user can set densityratio-based termination +condition (``q_threshold``) and a limit for the number of iterations +(``max_iter``). + +.. code:: ipython3 + + adaptive_smc = elfi.AdaptiveThresholdSMC(model['d'], batch_size=500, seed=2, q_threshold=0.995) + adaptive_smc_samples = adaptive_smc.sample(1000, max_iter=10) + + +.. parsed-literal:: + + ABC-SMC Round 1 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 2 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 3 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 4 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 5 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 6 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 7 / 10 + Progress [==================================================] 100.0% Complete + ABC-SMC Round 8 / 10 + Progress [==================================================] 100.0% Complete + + +We compare visually the approximated posterior and the true posterior, +which in this case is available. + +.. code:: ipython3 + + def gaussian_mixture_density(theta, sigma_1=1, sigma_2=0.1): + y = 0.5 * ss.norm.pdf(theta, loc=0, scale=sigma_1) + 0.5 * ss.norm.pdf(theta, loc=0, scale=sigma_2) + return y + +.. code:: ipython3 + + print(smc_samples) + print(adaptive_smc_samples) + + +.. parsed-literal:: + + Method: SMC + Number of samples: 1000 + Number of simulations: 1352000 + Threshold: 0.0025 + Sample means: theta: 0.0181 + + Method: AdaptiveThresholdSMC + Number of samples: 1000 + Number of simulations: 49500 + Threshold: 0.236 + Sample means: theta: -0.042 + + + +We compute Kernel density estimates of the posteriors based on the +approximate posterior samples and visualise them in a density plot. + +.. code:: ipython3 + + smc_posteriorpdf = gaussian_kde(smc_samples.samples_array[:,0]) + adaptive_smc_posteriorpdf = gaussian_kde(adaptive_smc_samples.samples_array[:,0]) + + reference_posteriorpdf = gaussian_mixture_density + + xs = np.linspace(-3,3,200) + smc_posteriorpdf.covariance_factor = lambda : .25 + smc_posteriorpdf._compute_covariance() + adaptive_smc_posteriorpdf.covariance_factor = lambda : .25 + adaptive_smc_posteriorpdf._compute_covariance() + plt.figure(figsize=(16,10)) + plt.plot(xs,smc_posteriorpdf(xs)) + plt.plot(xs,adaptive_smc_posteriorpdf(xs)) + plt.plot(xs,reference_posteriorpdf(xs)) + plt.legend(('abc-smc', 'adaptive abc-smc', 'reference')); + + + +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/adaptive_threshold_selection_files/adaptive_threshold_selection_12_0.png + + + +[1] Simola, U., Cisewski-Kehe, J., Gutmann, M.U. and Corander, J. +`Adaptive Approximate Bayesian Computation Tolerance +Selection `__, +Bayesian Analysis 1(1):1-27, 2021 diff --git a/docs/usage/external.rst b/docs/usage/external.rst index b1552476..ecf28f7a 100644 --- a/docs/usage/external.rst +++ b/docs/usage/external.rst @@ -283,7 +283,7 @@ information. That will be available under the ``meta`` keyword (see the -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/external_files/external_21_0.svg +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/external_files/external_21_0.svg @@ -344,7 +344,7 @@ inference for each of them: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/external_files/external_25_0.svg +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/external_files/external_25_0.svg @@ -406,7 +406,7 @@ inference for each of them: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/external_files/external_27_1.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/external_files/external_27_1.png Interfacing with R @@ -480,7 +480,7 @@ Load a ready made MA2 model: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/external_files/external_36_0.svg +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/external_files/external_36_0.svg @@ -570,7 +570,7 @@ Load a ready made MA2 model: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/external_files/external_48_0.svg +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/external_files/external_48_0.svg diff --git a/docs/usage/parallelization.rst b/docs/usage/parallelization.rst index 52f6f770..46b41100 100644 --- a/docs/usage/parallelization.rst +++ b/docs/usage/parallelization.rst @@ -41,7 +41,7 @@ Let’s get the model and plot it (requires graphviz) -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/parallelization_files/parallelization_5_0.svg +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/parallelization_files/parallelization_5_0.svg @@ -95,7 +95,7 @@ And that is it. The result object is also just like in the basic case: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/parallelization_files/parallelization_11_1.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/parallelization_files/parallelization_11_1.png Note that for reproducibility a reference to the activated client is diff --git a/docs/usage/tutorial.rst b/docs/usage/tutorial.rst index 2b2c25d4..a8322132 100644 --- a/docs/usage/tutorial.rst +++ b/docs/usage/tutorial.rst @@ -117,7 +117,7 @@ observed data alone. -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/tutorial_files/tutorial_11_0.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/tutorial_files/tutorial_11_0.png Approximate Bayesian Computation @@ -247,7 +247,7 @@ a DAG. -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/tutorial_files/tutorial_28_0.svg +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/tutorial_files/tutorial_28_0.svg @@ -312,7 +312,7 @@ These indeed sample from a triangle: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/tutorial_files/tutorial_36_0.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/tutorial_files/tutorial_36_0.png Let’s change the earlier priors to the new ones in the inference model: @@ -327,7 +327,7 @@ Let’s change the earlier priors to the new ones in the inference model: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/tutorial_files/tutorial_38_0.svg +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/tutorial_files/tutorial_38_0.svg @@ -387,7 +387,7 @@ time is spent in drawing. -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/tutorial_files/tutorial_45_0.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/tutorial_files/tutorial_45_0.png @@ -743,7 +743,7 @@ For example one can plot the marginal distributions: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/tutorial_files/tutorial_77_0.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/tutorial_files/tutorial_77_0.png Often “pairwise relationships” are more informative: @@ -754,7 +754,7 @@ Often “pairwise relationships” are more informative: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/tutorial_files/tutorial_79_0.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/tutorial_files/tutorial_79_0.png Note that if working in a non-interactive environment, you can use @@ -904,15 +904,15 @@ Or just the means: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/tutorial_files/tutorial_92_0.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/tutorial_files/tutorial_92_0.png -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/tutorial_files/tutorial_92_1.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/tutorial_files/tutorial_92_1.png -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/tutorial_files/tutorial_92_2.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/tutorial_files/tutorial_92_2.png Obviously one still has direct access to the samples as well, which @@ -935,7 +935,7 @@ allows custom plotting: -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/tutorial_files/tutorial_94_0.png +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/tutorial_files/tutorial_94_0.png It can be seen that the populations iteratively concentrate more and @@ -967,7 +967,7 @@ operations. -.. image:: http://research.cs.aalto.fi/pml/software/elfi/docs/0.6.2/usage/tutorial_files/tutorial_100_0.svg +.. image:: https://raw.githubusercontent.com/elfi-dev/notebooks/dev/figures/tutorial_files/tutorial_100_0.svg diff --git a/elfi/__init__.py b/elfi/__init__.py index 9b6ca185..2ebe32e5 100644 --- a/elfi/__init__.py +++ b/elfi/__init__.py @@ -28,4 +28,4 @@ __email__ = 'elfi-support@hiit.fi' # make sure __version_ is on the last non-empty line (read by setup.py) -__version__ = '0.7.7' +__version__ = '0.8.0'