diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
new file mode 100644
index 00000000..068ea3fa
--- /dev/null
+++ b/.github/workflows/pytest.yml
@@ -0,0 +1,42 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: pytest
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest
+        python -m pip install numpy
+        if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi        
+        python -m pip install -e .
+
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=99 --statistics
+    - name: Test with pytest
+      run: |
+        make test
diff --git a/.gitignore b/.gitignore
index 2f0a619d..8ef0ebb1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,6 +30,7 @@ var/
 
 # Images
 *.png
+!elfi_logo_text_nobg.png
 *.svg
 *.jpg
 *.jpeg
diff --git a/.readthedocs.yml b/.readthedocs.yml
index 16a3256a..384516d8 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -19,6 +19,6 @@ formats:
 
 # Optionally set the version of Python and requirements required to build your docs
 python:
-  version: 3.5
+  version: 3.6
   install:
     - requirements: requirements.txt
\ No newline at end of file
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 5ec2d321..bf0f608c 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,5 +1,20 @@
 Changelog
 =========
+ 
+0.8.1 (2021-10-12)
+------------------
+- Relax tightly pinned dependency on a version of dask[distributed]
+- Change lotka-volterra priors to follow the given reference
+- Fix README.md badges
+- Fix a few small issues with CONTRIBUTING.rst
+- Add Github Actions based CI workflow
+- Add the skeleton of TestBench-functionality for comparing methods
+- Fix a bug of plot_traces() not working if there is only 1 chain 
+- Fix histograms in pair_plot diagonals and improve visual outlook
+- Improve axes creation and visual outlook
+- Fix a bug where precomputed evidence size was not taken into account when reporting BOLFI-results
+- Fix a bug where observable nodes were not colored gray when using `elfi.draw`
+- Add `plot_predicted_node_pairs` in visualization.py.
 
 0.8.0 (2021-03-29)
 ------------------
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 61735334..4ba48a87 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -63,13 +63,16 @@ Get Started!
 Ready to contribute? Here's how to set up `ELFI` for local development.
 
 1. Fork the `elfi` repo on GitHub.
-2. Clone your fork locally::
+2. Clone your fork locally and add the base repository as a remote::
 
-    $ git clone git@github.com:your_name_here/elfi.git
+    $ git clone git@github.com:your_github_handle_here/elfi.git
+    $ cd elfi
+    $ git remote add upstream git@github.com:elfi-dev/elfi.git
 
 3. Make sure you have `Python 3 <https://www.python.org/>`_ and
    `Anaconda Distribution <https://www.anaconda.com/>`_ installed on your
-   machine. Check your conda and Python versions::
+   machine. Check your conda and Python versions. Currently supported Python versions
+   are 3.6, 3.7, 3.8::
 
    $ conda -V
    $ python -V
@@ -106,9 +109,15 @@ Ready to contribute? Here's how to set up `ELFI` for local development.
 
     $ git add .
     $ git commit -m "Your detailed description of your changes."
+
+9. After committing your changes, you may sync with the base repository if there has been changes::
+    $ git fetch upstream
+    $ git rebase upstream/dev
+
+10. Push the changes::
     $ git push origin name-of-your-bugfix-or-feature
 
-9. Submit a pull request through the GitHub website.
+11. Submit a pull request through the GitHub website.
 
 Style Guidelines
 ----------------
@@ -123,12 +132,12 @@ Pull Request Guidelines
 Before you submit a pull request, check that it meets these guidelines:
 
 1. The pull request should include tests that will be run automatically using
-   Travis-CI.
+   Github Actions.
 2. If the pull request adds functionality, the docs should be updated. Put
    your new functionality into a function with a docstring, and add the
-   feature to the list in README.rst.
-3. The pull request should work for Python 3.6 and later. Check
-   https://travis-ci.org/elfi-dev/elfi/pull_requests
+   feature to the list in README.md.
+3. The pull request should work for Python 3.6, or later. Check
+   https://github.com/elfi-dev/elfi/actions/workflows/pytest.yml
    and make sure that the tests pass for all supported Python versions.
 
 Tips
diff --git a/README.md b/README.md
index 6a89da87..f6e53f01 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,11 @@
-**Version 0.8.0 released!** See the [CHANGELOG](CHANGELOG.rst) and [notebooks](https://github.com/elfi-dev/notebooks).
+**Version 0.8.1 released!** See the [CHANGELOG](CHANGELOG.rst) and [notebooks](https://github.com/elfi-dev/notebooks).
+
+<img src="https://raw.githubusercontent.com/elfi-dev/elfi/dev/docs/logos/elfi_logo_text_nobg.png" width="200" />
 
 ELFI - Engine for Likelihood-Free Inference
 ===========================================
 
-[![Build Status](https://travis-ci.org/elfi-dev/elfi.svg?branch=master)](https://travis-ci.org/elfi-dev/elfi)
-[![Code Health](https://landscape.io/github/elfi-dev/elfi/dev/landscape.svg?style=flat)](https://landscape.io/github/elfi-dev/elfi/dev)
+[![Build Status](https://github.com/elfi-dev/elfi/actions/workflows/pytest.yml/badge.svg)](https://github.com/elfi-devs/elfi/actions)
 [![Documentation Status](https://readthedocs.org/projects/elfi/badge/?version=latest)](http://elfi.readthedocs.io/en/latest/?badge=latest)
 [![Gitter](https://badges.gitter.im/elfi-dev/elfi.svg)](https://gitter.im/elfi-dev/elfi?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
 [![DOI](https://zenodo.org/badge/69855441.svg)](https://zenodo.org/badge/latestdoi/69855441)
diff --git a/docs/logos/elfi_logo_text_nobg.png b/docs/logos/elfi_logo_text_nobg.png
new file mode 100644
index 00000000..b5dba44b
Binary files /dev/null and b/docs/logos/elfi_logo_text_nobg.png differ
diff --git a/elfi/__init__.py b/elfi/__init__.py
index 2ebe32e5..9d015b6e 100644
--- a/elfi/__init__.py
+++ b/elfi/__init__.py
@@ -20,12 +20,14 @@
 from elfi.model.elfi_model import *
 from elfi.model.extensions import ScipyLikeDistribution as Distribution
 from elfi.store import OutputPool, ArrayPool
+from elfi.testbench.testbench import Testbench, TestbenchMethod
 from elfi.visualization.visualization import nx_draw as draw
 from elfi.visualization.visualization import plot_params_vs_node
+from elfi.visualization.visualization import plot_predicted_summaries
 from elfi.methods.bo.gpy_regression import GPyRegression
 
 __author__ = 'ELFI authors'
 __email__ = 'elfi-support@hiit.fi'
 
 # make sure __version_ is on the last non-empty line (read by setup.py)
-__version__ = '0.8.0'
+__version__ = '0.8.1'
diff --git a/elfi/examples/lotka_volterra.py b/elfi/examples/lotka_volterra.py
index c09cf88a..0d17ff44 100644
--- a/elfi/examples/lotka_volterra.py
+++ b/elfi/examples/lotka_volterra.py
@@ -170,9 +170,9 @@ def get_model(n_obs=50, true_params=None, seed_obs=None, **kwargs):
     priors = []
     sumstats = []
 
-    priors.append(elfi.Prior(ExpUniform, -2, 0, model=m, name='r1'))
-    priors.append(elfi.Prior(ExpUniform, -5, -2.5, model=m, name='r2'))  # easily kills populations
-    priors.append(elfi.Prior(ExpUniform, -2, 0, model=m, name='r3'))
+    priors.append(elfi.Prior(ExpUniform, -6., 2., model=m, name='r1'))
+    priors.append(elfi.Prior(ExpUniform, -6., 2., model=m, name='r2'))  # easily kills populations
+    priors.append(elfi.Prior(ExpUniform, -6., 2., model=m, name='r3'))
     priors.append(elfi.Prior('poisson', 50, model=m, name='prey0'))
     priors.append(elfi.Prior('poisson', 100, model=m, name='predator0'))
     priors.append(elfi.Prior(ExpUniform, np.log(0.5), np.log(50), model=m, name='sigma'))
diff --git a/elfi/methods/inference/bolfi.py b/elfi/methods/inference/bolfi.py
index 2245b2d3..628444e3 100644
--- a/elfi/methods/inference/bolfi.py
+++ b/elfi/methods/inference/bolfi.py
@@ -587,5 +587,5 @@ def sample(self,
             parameter_names=self.parameter_names,
             warmup=warmup,
             threshold=float(posterior.threshold),
-            n_sim=self.state['n_sim'],
+            n_sim=self.state['n_evidence'],
             seed=self.seed)
diff --git a/elfi/methods/mcmc.py b/elfi/methods/mcmc.py
index 569ac1ec..60faf10b 100644
--- a/elfi/methods/mcmc.py
+++ b/elfi/methods/mcmc.py
@@ -127,7 +127,7 @@ def nuts(n_iter,
     No-U-Turn Sampler, an improved version of the Hamiltonian (Markov Chain) Monte Carlo sampler.
 
     Based on Algorithm 6 in
-    Hoffman & Gelman, depthMLR 15, 1351-1381, 2014.
+    Hoffman & Gelman, JMLR 15, 1593-1623, 2014.
 
     Parameters
     ----------
diff --git a/elfi/testbench/testbench.py b/elfi/testbench/testbench.py
new file mode 100644
index 00000000..113ed664
--- /dev/null
+++ b/elfi/testbench/testbench.py
@@ -0,0 +1,300 @@
+"""This module implements testbench-functionality in elfi."""
+
+import logging
+
+import numpy as np
+
+from elfi.visualization.visualization import ProgressBar
+
+logger = logging.getLogger(__name__)
+
+__all__ = ['Testbench', 'TestbenchMethod']
+
+
+class Testbench:
+    """Base class for comparing the performance of LFI-methods.
+
+       One elfi.Model can be inferred `repetitions`-times with
+       each of the methods included in `method_list`.
+
+    Attributes
+    ----------
+    model : elfi.Model
+        elfi.Model which is inferred.
+    method_list : list
+        List of elfi-inference methods.
+    repetitions : int
+        How many repetitions of models is included in the testbench.
+    seed : int, optional
+
+
+    """
+
+    def __init__(self,
+                 model=None,
+                 repetitions=1,
+                 observations=None,
+                 reference_parameter=None,
+                 reference_posterior=None,
+                 progress_bar=True,
+                 seed=None):
+        """Construct the testbench object.
+
+        Parameters
+        ----------
+        model : elfi.Model
+            elfi.Model which is inferred.
+        repetitions : int
+            How many repetitions of models is included in the testbench.
+        observation : np.array, optional
+            Observation, if available.
+        reference_parameter : dictionary, optional
+            True parameter values if available.
+        reference_posterior : np.array, optional
+            A sample from a reference posterior.
+        progress_bar : boolean
+            Indicate whether to display testbench progressbar.
+        seed : int, optional
+
+        """
+        # TODO: Resolve the situation when the name of the method to be added already exists.
+        self.model = model
+        self.method_list = []
+        self.method_seed_list = []
+        self.repetitions = repetitions
+        self.rng = np.random.RandomState(seed)
+
+        if observations is not None:
+            self.observations = observations.copy()
+        else:
+            self.observations = observations
+
+        if reference_parameter is not None:
+            self.reference_parameter = reference_parameter.copy()
+        else:
+            self.reference_parameter = reference_parameter
+
+        self.param_dim = len(model.parameter_names)
+        self.param_names = model.parameter_names
+        # TODO Add functionality to deal with reference posterior
+        self.reference_posterior = reference_posterior
+        self.simulator_name = list(model.observed)[0]
+        if progress_bar:
+            self.progress_bar = ProgressBar(prefix='Progress', suffix='Complete',
+                                            decimals=1, length=50, fill='=')
+        else:
+            self.progress_bar = None
+
+        self._resolve_test_type()
+        self._collect_tests()
+
+    def _collect_tests(self):
+        self.test_dictionary = {
+            'model': self.model,
+            'observations': self.observations,
+            'reference_parameter': self.reference_parameter,
+            'reference_posterior': self.reference_posterior
+        }
+
+    def _get_seeds(self, n_rep=1):
+        """Fix a seed for each of the repeated instances."""
+        upper_limit = 2 ** 32 - 1
+        return self.rng.randint(
+            low=0,
+            high=upper_limit,
+            size=n_rep,
+            dtype=np.uint32)
+
+    def _resolve_test_type(self):
+        self._set_default_test_type()
+        self._resolve_observations()
+        self._resolve_reference_parameters()
+
+    def _set_default_test_type(self):
+        self.description = {
+            'observations_available': self.observations is not None,
+            'reference_parameters_available': self.reference_parameter is not None,
+            'reference_posterior_available': self.reference_posterior is not None
+            }
+
+    def _resolve_reference_parameters(self):
+        if self.description['reference_parameters_available']:
+            for keys, values in self.reference_parameter.items():
+                self.reference_parameter[keys] = np.repeat(
+                    values,
+                    repeats=self.repetitions
+                    )
+
+        elif not self.description['observations_available']:
+            seed = self._get_seeds(n_rep=1)
+            self.reference_parameter = self.model.generate(
+                batch_size=self.repetitions,
+                outputs=self.model.parameter_names,
+                seed=seed[0])
+
+    def _resolve_observations(self):
+        if self.description['observations_available']:
+            self.observations = np.repeat(
+                self.observations,
+                repeats=self.repetitions,
+                axis=0)
+        else:
+            seed = self._get_seeds(n_rep=1)
+            self.observations = self.model.generate(
+                with_values=self.reference_parameter,
+                outputs=self.simulator_name,
+                batch_size=self.repetitions,
+                seed=seed[0])[self.simulator_name]
+
+    def add_method(self, new_method):
+        """Add a new method to the testbench.
+
+        Parameters
+        ----------
+        new_method : TestbenchMethod
+            An inference method as a TestbenchMethod.
+
+        """
+        logger.info('Adding {} to testbench.'.format(new_method.attributes['name']))
+        self.method_list.append(new_method)
+        self.method_seed_list.append(self._get_seeds(n_rep=self.repetitions))
+
+    def run(self):
+        """Run Testbench."""
+        self.testbench_results = []
+        for method_index, method in enumerate(self.method_list):
+            logger.info('Running {} in testbench.'.format(method.attributes['name']))
+
+            if self.progress_bar:
+                self.progress_bar.reinit_progressbar(reinit_msg=method.attributes['name'])
+
+            self.testbench_results.append(
+                self._repeat_inference(method, self.method_seed_list[method_index])
+                )
+
+    def _repeat_inference(self, method, seed_list):
+        repeated_result = []
+        model = self.model.copy()
+        for i in np.arange(self.repetitions):
+            if self.progress_bar:
+                self.progress_bar.update_progressbar(i + 1, self.repetitions)
+
+            model.observed[self.simulator_name] = np.atleast_2d(self.observations[i])
+
+            repeated_result.append(
+                self._draw_posterior_sample(method, model, seed_list[i])
+                )
+
+        return self._collect_results(
+            method.attributes['name'],
+            repeated_result)
+
+    def _draw_posterior_sample(self, method, model, seed):
+        method_instance = method.attributes['callable'](
+            model,
+            **method.attributes['method_kwargs'],
+            seed=seed)
+
+        fit_kwargs = method.attributes['fit_kwargs']
+
+        if len(fit_kwargs) > 0:
+            method_instance.fit(fit_kwargs)
+
+        sampler_kwargs = method.attributes['sample_kwargs']
+
+        return method_instance.sample(**sampler_kwargs)
+
+    def _collect_results(self, name, results):
+        result_dictionary = {
+            'method': name,
+            'results': results
+        }
+        return result_dictionary
+
+    # TODO
+    def _compare_sample_results(self):
+        """Compare results in sample-format."""
+
+    # TODO
+    def _retrodiction(self):
+        """Infer a problem with known parameter values."""
+
+    def get_testbench_results(self):
+        """Return Testbench testcases and results."""
+        testbench_data = {
+            'testcases': self.test_dictionary,
+            'results': self.testbench_results
+        }
+        return testbench_data
+
+    def parameterwise_sample_mean_differences(self):
+        """Return parameterwise differences for the sample mean for methods in Testbench."""
+        sample_mean_difference_results = {}
+        for _, method_results in enumerate(self.testbench_results):
+            sample_mean_difference_results[method_results['method']] = (
+                self._get_sample_mean_difference(method_results)
+            )
+
+        return sample_mean_difference_results
+
+    def _get_sample_mean_difference(self, method):
+        sample_mean_difference = {}
+        for param_names in self.param_names:
+            sample_mean_difference[param_names] = [
+                results.sample_means[param_names] - self.reference_parameter[param_names][0]
+                for results in method['results']
+            ]
+
+        return sample_mean_difference
+
+
+class TestbenchMethod:
+    """Container for ParameterInference methods included in Testbench."""
+
+    def __init__(self,
+                 method,
+                 method_kwargs={},
+                 fit_kwargs={},
+                 sample_kwargs={},
+                 name=None):
+        """Construct the TestbenchMethod container.
+
+        Parameters
+        ----------
+        method : elfi.ParameterInference
+            elfi.ParameterInfence-method which is included in Testbench.
+        method_kwargs :
+            Options of elfi.ParameterInference-method
+        fit_kwargs :
+            Options of elfi.ParameterInference.fit-method
+        sample_kwargs :
+            Options of elfi.ParameterInference.sample-method
+        name : string, optional
+            Name used the testbench
+
+        """
+        name = name or method.__name__
+        self.attributes = {'callable': method,
+                           'method_kwargs': method_kwargs,
+                           'fit_kwargs': fit_kwargs,
+                           'sample_kwargs': sample_kwargs,
+                           'name': name}
+
+    def set_method_kwargs(self, **kwargs):
+        """Add options for the ParameterInference contructor."""
+        logger.info("Setting options for {}".format(self.attributes['name']))
+        self.attributes['method_kwargs'] = kwargs
+
+    def set_fit_kwargs(self, **kwargs):
+        """Add options for the ParameterInference method fit()."""
+        logger.info("Setting surrogate fit options for {}".format(self.attributes['name']))
+        self.attributes['fit_kwargs'] = kwargs
+
+    def set_sample_kwargs(self, **kwargs):
+        """Add options for the ParameterInference method sample()."""
+        logger.info("Setting sampler options for {}".format(self.attributes['name']))
+        self.attributes['sample_kwargs'] = kwargs
+
+    def get_method(self):
+        """Return TestbenchMethod attributes."""
+        return self.attributes
diff --git a/elfi/visualization/visualization.py b/elfi/visualization/visualization.py
index 670cf698..72c02b6e 100644
--- a/elfi/visualization/visualization.py
+++ b/elfi/visualization/visualization.py
@@ -53,7 +53,7 @@ def nx_draw(G, internal=False, param_names=False, filename=None, format=None):
             hidden.add(n)
             continue
         _format = {'shape': 'circle', 'fillcolor': 'gray80', 'style': 'solid'}
-        if state.get('_observable'):
+        if state['attr_dict'].get('_observable'):
             _format['style'] = 'filled'
         dot.node(n, **_format)
 
@@ -89,7 +89,7 @@ def _create_axes(axes, shape, **kwargs):
 
     """
     fig_kwargs = {}
-    kwargs['figsize'] = kwargs.get('figsize', (16, 4 * shape[0]))
+    kwargs['figsize'] = kwargs.get('figsize', (4 * shape[1], 4 * shape[0]))
     for k in ['figsize', 'sharex', 'sharey', 'dpi', 'num']:
         if k in kwargs.keys():
             fig_kwargs[k] = kwargs.pop(k)
@@ -98,8 +98,10 @@ def _create_axes(axes, shape, **kwargs):
         axes = np.atleast_2d(axes)
     else:
         fig, axes = plt.subplots(ncols=shape[1], nrows=shape[0], **fig_kwargs)
-        axes = np.atleast_2d(axes)
-        fig.tight_layout(pad=2.0)
+        axes = np.reshape(axes, shape)
+        fig.tight_layout(pad=2.0, h_pad=1.08, w_pad=1.08)
+        fig.subplots_adjust(wspace=0.2, hspace=0.2)
+
     return axes, kwargs
 
 
@@ -157,7 +159,13 @@ def plot_marginals(samples, selector=None, bins=20, axes=None, **kwargs):
     return axes
 
 
-def plot_pairs(samples, selector=None, bins=20, axes=None, **kwargs):
+def plot_pairs(samples,
+               selector=None,
+               bins=20,
+               reference_value=None,
+               axes=None,
+               draw_upper_triagonal=False,
+               **kwargs):
     """Plot pairwise relationships as a matrix with marginals on the diagonal.
 
     The y-axis of marginal histograms are scaled.
@@ -169,7 +177,11 @@ def plot_pairs(samples, selector=None, bins=20, axes=None, **kwargs):
         Indices or keys to use from samples. Default to all.
     bins : int, optional
         Number of bins in histograms.
+    reference_value: dict, optional
+        Dictionary containing reference values for parameters.
     axes : one or an iterable of plt.Axes, optional
+    draw_upper_triagonal: boolean, optional
+        Boolean indicating whether to draw symmetric upper triagonal part.
 
     Returns
     -------
@@ -178,7 +190,7 @@ def plot_pairs(samples, selector=None, bins=20, axes=None, **kwargs):
     """
     samples = _limit_params(samples, selector)
     shape = (len(samples), len(samples))
-    edgecolor = kwargs.pop('edgecolor', 'none')
+    edgecolor = kwargs.pop('edgecolor', 'black')
     dot_size = kwargs.pop('s', 2)
     axes, kwargs = _create_axes(axes, shape, **kwargs)
 
@@ -187,22 +199,46 @@ def plot_pairs(samples, selector=None, bins=20, axes=None, **kwargs):
         max_samples = samples[key_row].max()
         for idx_col, key_col in enumerate(samples):
             if idx_row == idx_col:
-                # create a histogram with scaled y-axis
-                hist, bin_edges = np.histogram(samples[key_row], bins=bins)
-                bar_width = bin_edges[1] - bin_edges[0]
-                hist = (hist - hist.min()) * (max_samples - min_samples) / (
-                    hist.max() - hist.min())
-                axes[idx_row, idx_col].bar(bin_edges[:-1],
-                                           hist,
-                                           bar_width,
-                                           bottom=min_samples,
-                                           **kwargs)
+                axes[idx_row, idx_col].hist(samples[key_row], bins=bins, density=True, **kwargs)
+                if reference_value is not None:
+                    axes[idx_row, idx_col].plot(
+                        reference_value[key_row], 0,
+                        color='red',
+                        alpha=1.0,
+                        linewidth=2,
+                        marker='X',
+                        clip_on=False,
+                        markersize=12)
+                axes[idx_row, idx_col].get_yaxis().set_ticklabels([])
+                axes[idx_row, idx_col].set(xlim=(min_samples, max_samples))
             else:
-                axes[idx_row, idx_col].scatter(samples[key_col],
-                                               samples[key_row],
-                                               s=dot_size,
-                                               edgecolor=edgecolor,
-                                               **kwargs)
+                if (idx_row > idx_col) or draw_upper_triagonal:
+                    axes[idx_row, idx_col].plot(samples[key_col],
+                                                samples[key_row],
+                                                linestyle='',
+                                                marker='o',
+                                                alpha=0.6,
+                                                clip_on=False,
+                                                markersize=dot_size,
+                                                markeredgecolor=edgecolor,
+                                                **kwargs)
+                    if reference_value is not None:
+                        axes[idx_row, idx_col].plot(
+                            [samples[key_col].min(), samples[key_col].max()],
+                            [reference_value[key_row], reference_value[key_row]],
+                            color='red', alpha=0.8, linewidth=2)
+                        axes[idx_row, idx_col].plot(
+                            [reference_value[key_col], reference_value[key_col]],
+                            [samples[key_row].min(), samples[key_row].max()],
+                            color='red', alpha=0.8, linewidth=2)
+
+                    axes[idx_row, idx_col].axis([samples[key_col].min(),
+                                                samples[key_col].max(),
+                                                samples[key_row].min(),
+                                                samples[key_row].max()])
+                else:
+                    if idx_row < idx_col:
+                        axes[idx_row, idx_col].axis('off')
 
         axes[idx_row, 0].set_ylabel(key_row)
         axes[-1, idx_row].set_xlabel(key_row)
@@ -250,92 +286,6 @@ def plot_traces(result, selector=None, axes=None, **kwargs):
     return axes
 
 
-class ProgressBar:
-    """Progress bar monitoring the inference process.
-
-    Attributes
-    ----------
-    prefix : str, optional
-        Prefix string
-    suffix : str, optional
-        Suffix string
-    decimals : int, optional
-        Positive number of decimals in percent complete
-    length : int, optional
-        Character length of bar
-    fill : str, optional
-        Bar fill character
-    scaling : int, optional
-        Integer used to scale current iteration and total iterations of the progress bar
-
-    """
-
-    def __init__(self, prefix='', suffix='', decimals=1, length=100, fill='='):
-        """Construct progressbar for monitoring.
-
-        Parameters
-        ----------
-        prefix : str, optional
-            Prefix string
-        suffix : str, optional
-            Suffix string
-        decimals : int, optional
-            Positive number of decimals in percent complete
-        length : int, optional
-            Character length of bar
-        fill : str, optional
-            Bar fill character
-
-        """
-        self.prefix = prefix
-        self.suffix = suffix
-        self.decimals = 1
-        self.length = length
-        self.fill = fill
-        self.scaling = 0
-        self.finished = False
-
-    def update_progressbar(self, iteration, total):
-        """Print updated progress bar in console.
-
-        Parameters
-        ----------
-        iteration : int
-            Integer indicating completed iterations
-        total : int
-            Integer indicating total number of iterations
-
-        """
-        if iteration >= total:
-            percent = ("{0:." + str(self.decimals) + "f}").\
-                format(100.0)
-            bar = self.fill * self.length
-            if not self.finished:
-                print('%s [%s] %s%% %s' % (self.prefix, bar, percent, self.suffix))
-                self.finished = True
-        elif total - self.scaling > 0:
-            percent = ("{0:." + str(self.decimals) + "f}").\
-                format(100 * ((iteration - self.scaling) / float(total - self.scaling)))
-            filled_length = int(self.length * (iteration - self.scaling) // (total - self.scaling))
-            bar = self.fill * filled_length + '-' * (self.length - filled_length)
-            print('%s [%s] %s%% %s' % (self.prefix, bar, percent, self.suffix), end='\r')
-
-    def reinit_progressbar(self, scaling=0, reinit_msg=""):
-        """Reinitialize new round of progress bar.
-
-        Parameters
-        ----------
-        scaling : int, optional
-            Integer used to scale current and total iterations of the progress bar
-        reinit_msg : str, optional
-            Message printed before restarting an empty progess bar on a new line
-
-        """
-        self.scaling = scaling
-        self.finished = False
-        print(reinit_msg)
-
-
 def plot_params_vs_node(node, n_samples=100, func=None, seed=None, axes=None, **kwargs):
     """Plot some realizations of parameters vs. `node`.
 
@@ -489,15 +439,11 @@ def plot_gp(gp, parameter_names, axes=None, resol=50,
         const = x_evidence[np.argmin(y_evidence), :]
     bounds = bounds or gp.bounds
 
-    cmap = plt.cm.get_cmap("bone")
-
-    plt.subplots_adjust(wspace=0.2, hspace=0.0, left=0.3, right=0.7, top=0.8, bottom=0.05)
+    cmap = plt.cm.get_cmap("Blues")
     for ix in range(n_plots):
         for jy in range(n_plots):
             if ix == jy:
-                axes[jy, ix].scatter(x_evidence[:, ix], y_evidence)
-                axes[jy, ix].set_aspect(aspect=(bounds[ix][1] - bounds[ix][0]) /
-                                               (max(y_evidence) - min(y_evidence)))
+                axes[jy, ix].scatter(x_evidence[:, ix], y_evidence, edgecolors='black', alpha=0.6)
                 axes[jy, ix].get_yaxis().set_ticklabels([])
                 axes[jy, ix].yaxis.tick_right()
                 axes[jy, ix].set_ylabel('Discrepancy')
@@ -507,7 +453,7 @@ def plot_gp(gp, parameter_names, axes=None, resol=50,
                     axes[jy, ix].plot([true_params[parameter_names[ix]],
                                       true_params[parameter_names[ix]]],
                                       [min(y_evidence), max(y_evidence)],
-                                      color='orange', alpha=0.5, linewidth=4)
+                                      color='red', alpha=1.0, linewidth=1)
                 axes[jy, ix].axis([bounds[ix][0], bounds[ix][1], min(y_evidence), max(y_evidence)])
 
             elif ix < jy:
@@ -520,20 +466,22 @@ def plot_gp(gp, parameter_names, axes=None, resol=50,
 
                 z = gp.predict_mean(predictors).reshape(resol, resol)
                 axes[jy, ix].contourf(x, y, z, cmap=cmap)
-                axes[jy, ix].scatter(x_evidence[:, ix], x_evidence[:, jy], color="red", alpha=0.1)
-                axes[jy, ix].set_aspect(aspect=(bounds[ix][1] - bounds[ix][0]) /
-                                               (bounds[jy][1] - bounds[jy][0]))
+                axes[jy, ix].scatter(x_evidence[:, ix],
+                                     x_evidence[:, jy],
+                                     color="red",
+                                     alpha=0.7,
+                                     s=5)
 
                 if true_params is not None:
                     axes[jy, ix].plot([true_params[parameter_names[ix]],
                                       true_params[parameter_names[ix]]],
                                       [bounds[jy][0], bounds[jy][1]],
-                                      color='orange', alpha=0.5, linewidth=4)
+                                      color='red', alpha=1.0, linewidth=1)
 
                     axes[jy, ix].plot([bounds[ix][0], bounds[ix][1]],
                                       [true_params[parameter_names[jy]],
                                       true_params[parameter_names[jy]]],
-                                      color='orange', alpha=0.5, linewidth=4)
+                                      color='red', alpha=1.0, linewidth=1)
 
                 if ix == 0:
                     axes[jy, ix].set_ylabel(parameter_names[jy])
@@ -551,3 +499,131 @@ def plot_gp(gp, parameter_names, axes=None, resol=50,
                 axes[jy, ix].set_xlabel(parameter_names[ix])
 
     return axes
+
+
+def plot_predicted_summaries(model=None,
+                             summary_names=None,
+                             n_samples=100,
+                             seed=None,
+                             bins=20,
+                             axes=None,
+                             add_observed=True,
+                             draw_upper_triagonal=False,
+                             **kwargs):
+    """Pairplots of 1D summary statistics calculated from prior predictive distribution.
+
+    Parameters
+    ----------
+    model: elfi.Model
+        Model which is explored.
+    summary_names: list of strings
+        Summary statistics which are pairplotted.
+    n_samples: int, optional
+        How many samples are drawn from the model.
+    bins : int, optional
+        Number of bins in histograms.
+    axes : one or an iterable of plt.Axes, optional
+    add_observed: boolean, optional
+        Add observed summary points in pairplots
+    draw_upper_triagonal: boolean, optional
+        Boolean indicating whether to draw symmetric upper triagonal part.
+
+
+    """
+    dot_size = kwargs.pop('s', 8)
+    samples = model.generate(batch_size=n_samples, outputs=summary_names, seed=seed)
+    reference_value = model.generate(with_values=model.observed, outputs=summary_names)
+    reference_value = reference_value if add_observed else None
+    plot_pairs(samples,
+               selector=None,
+               bins=bins,
+               axes=axes,
+               reference_value=reference_value,
+               s=dot_size,
+               draw_upper_triagonal=draw_upper_triagonal)
+
+
+class ProgressBar:
+    """Progress bar monitoring the inference process.
+
+    Attributes
+    ----------
+    prefix : str, optional
+        Prefix string
+    suffix : str, optional
+        Suffix string
+    decimals : int, optional
+        Positive number of decimals in percent complete
+    length : int, optional
+        Character length of bar
+    fill : str, optional
+        Bar fill character
+    scaling : int, optional
+        Integer used to scale current iteration and total iterations of the progress bar
+
+    """
+
+    def __init__(self, prefix='', suffix='', decimals=1, length=100, fill='='):
+        """Construct progressbar for monitoring.
+
+        Parameters
+        ----------
+        prefix : str, optional
+            Prefix string
+        suffix : str, optional
+            Suffix string
+        decimals : int, optional
+            Positive number of decimals in percent complete
+        length : int, optional
+            Character length of bar
+        fill : str, optional
+            Bar fill character
+
+        """
+        self.prefix = prefix
+        self.suffix = suffix
+        self.decimals = 1
+        self.length = length
+        self.fill = fill
+        self.scaling = 0
+        self.finished = False
+
+    def update_progressbar(self, iteration, total):
+        """Print updated progress bar in console.
+
+        Parameters
+        ----------
+        iteration : int
+            Integer indicating completed iterations
+        total : int
+            Integer indicating total number of iterations
+
+        """
+        if iteration >= total:
+            percent = ("{0:." + str(self.decimals) + "f}").\
+                format(100.0)
+            bar = self.fill * self.length
+            if not self.finished:
+                print('%s [%s] %s%% %s' % (self.prefix, bar, percent, self.suffix))
+                self.finished = True
+        elif total - self.scaling > 0:
+            percent = ("{0:." + str(self.decimals) + "f}").\
+                format(100 * ((iteration - self.scaling) / float(total - self.scaling)))
+            filled_length = int(self.length * (iteration - self.scaling) // (total - self.scaling))
+            bar = self.fill * filled_length + '-' * (self.length - filled_length)
+            print('%s [%s] %s%% %s' % (self.prefix, bar, percent, self.suffix), end='\r')
+
+    def reinit_progressbar(self, scaling=0, reinit_msg=""):
+        """Reinitialize new round of progress bar.
+
+        Parameters
+        ----------
+        scaling : int, optional
+            Integer used to scale current and total iterations of the progress bar
+        reinit_msg : str, optional
+            Message printed before restarting an empty progess bar on a new line
+
+        """
+        self.scaling = scaling
+        self.finished = False
+        print(reinit_msg)
diff --git a/requirements-dev.txt b/requirements-dev.txt
index f0d315b4..d9983345 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -8,7 +8,7 @@ pytest-rerunfailures>=4.1
 # Linting
 flake8>=3.0.4
 flake8-docstrings>=1.0.2
-isort>=4.2.5
+isort>=4.2.5, <5.0.0
 flake8-isort>=2.0.1
 pydocstyle<5.0.0
 
diff --git a/requirements.txt b/requirements.txt
index b785af1e..129b32bf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-dask[distributed]==2.30.0
+dask[distributed]>=2.30.0
 numpy>=1.12.1
 scipy>=0.19
 matplotlib>=1.1
diff --git a/tests/unit/test_testbench.py b/tests/unit/test_testbench.py
new file mode 100644
index 00000000..adf191d6
--- /dev/null
+++ b/tests/unit/test_testbench.py
@@ -0,0 +1,111 @@
+import numpy as np
+from numpy.lib.function_base import quantile
+import pytest
+
+import elfi
+import elfi.examples.ma2 as exma2
+from elfi.methods.inference.parameter_inference import ParameterInference
+
+
+def test_testbenchmethod_init():
+
+    method = elfi.TestbenchMethod(method=elfi.SMC, name="SMC_1")
+    method.set_method_kwargs(discrepancy_name='d', batch_size=50)
+    method.set_sample_kwargs(n_samples=100, thresholds=[2.0, 1.0], bar=False)
+    attr = method.get_method()
+
+    assert attr['name'] == "SMC_1"
+    assert attr['method_kwargs']['batch_size'] == 50
+    assert attr['sample_kwargs']['n_samples'] == 100
+
+def test_testbench_init_param_reps(ma2):
+
+    testbench = elfi.Testbench(model=ma2,
+                               repetitions=5,
+                               seed=99,
+                               progress_bar=False)
+
+    for _, values in testbench.reference_parameter.items():
+        assert values.size == 5
+
+
+def test_testbench_init_given_params(ma2):
+
+    ref_params = ma2.generate(batch_size=1, outputs=['t1', 't2'])
+    testbench = elfi.Testbench(model=ma2,
+                               reference_parameter=ref_params,
+                               repetitions=5,
+                               seed=99,
+                               progress_bar=False)
+
+    for _, values in testbench.reference_parameter.items():
+        assert np.all(values == values[0])
+        assert values.size == 5
+
+
+def test_testbench_init_obs_reps(ma2):
+
+    testbench = elfi.Testbench(model=ma2,
+                               repetitions=5,
+                               seed=99,
+                               progress_bar=False)
+
+    assert len(testbench.observations) == 5
+
+
+def test_testbench_init_given_obs(ma2):
+
+    obs = ma2.generate(batch_size=1, outputs=['MA2'])
+    testbench = elfi.Testbench(model=ma2,
+                               observations=obs,
+                               repetitions=5,
+                               seed=99,
+                               progress_bar=False)
+
+    assert len(testbench.observations) == 5
+    assert np.all(
+        [a == b for a, b in zip([obs], testbench.observations)]
+        )
+
+
+def test_testbench_execution(ma2):
+
+    method1 = elfi.TestbenchMethod(method=elfi.Rejection, name='Rejection_1')
+    method1.set_method_kwargs(discrepancy_name='d', batch_size=500)
+    method1.set_sample_kwargs(n_samples=500, bar=False)
+
+    method2 = elfi.TestbenchMethod(method=elfi.Rejection, name='Rejection_2')
+    method2.set_method_kwargs(discrepancy_name='d', batch_size=500)
+    method2.set_sample_kwargs(n_samples=500, quantile=0.5, bar=False)
+
+    testbench = elfi.Testbench(model=ma2,
+                               repetitions=3,
+                               seed=156,
+                               progress_bar=False)
+    testbench.add_method(method1)
+    testbench.add_method(method2)
+
+    testbench.run()
+
+    sample_mean_differences = testbench.parameterwise_sample_mean_differences()
+    assert len(sample_mean_differences) == 2
+    assert len(sample_mean_differences['Rejection_1']) == 2
+    assert len(sample_mean_differences['Rejection_1']['t1']) == 3
+
+
+def test_testbench_seeding(ma2):
+
+    testbench1 = elfi.Testbench(model=ma2,
+                                repetitions=2,
+                                seed=100,
+                                progress_bar=False)
+
+    testbench2 = elfi.Testbench(model=ma2,
+                                repetitions=2,
+                                seed=100,
+                                progress_bar=False)
+
+    assert len(testbench1.observations) == len(testbench2.observations)
+    assert np.all(
+        [a == b for a, b in zip(testbench1.observations, testbench2.observations)]
+        )