diff --git a/bin/opusfilter-autogen b/bin/opusfilter-autogen
index 9124170..a478f99 100644
--- a/bin/opusfilter-autogen
+++ b/bin/opusfilter-autogen
@@ -43,9 +43,9 @@ parser.add_argument('--clusters', '-k', default=2, type=int, metavar='INT',
 parser.add_argument('--work-dir', default='work',
                     help='Location of the source and target files for the generated configuration (default %(default)s)')
 parser.add_argument('--inter-dir', help='Save intermediate files in this directory (use a temporary directory if not given)')
-parser.add_argument('--plot', action='store_true',
-                    help=('Show a scatter plot of the clustering and histograms of feature data distributions; '
-                          'only for the clustering method'))
+parser.add_argument('--plot', metavar='PATH', default=None, type=str,
+                    help=('Create histograms of feature data distributions and a scatter plot of the clustering; '
+                          'give path to plot the PDF files to, or "-" for interactive plots; only for the clustering method'))
 parser.add_argument('--list-defaults', action='store_true', help='List default filters of the method to the output and quit')
 parser.add_argument('--add-filter', nargs=2, action='append', default=[], metavar=('CLASS', 'JSON'),
                     help=('Instead of using default filters, add a filter of CLASS with JSON parameters object '
@@ -78,9 +78,12 @@ if args.list_defaults:
 
 filters = filtergen.set_filter_thresholds()
 
-if args.method == 'clustering' and args.plot:
-    filtergen.scoredata.plot(plt)
-    plt.show()
+if args.method == 'clustering' and args.plot is not None:
+    if args.plot == '-':
+        filtergen.scoredata.plot(plt)
+        plt.show()
+    else:
+        filtergen.scoredata.plot(plt, path=args.plot)
 
 generator = ConfigurationGenerator(
     files=[os.path.abspath(f) for f in args.files], langs=args.langs, workdir=args.work_dir)
diff --git a/opusfilter/autogen_cluster.py b/opusfilter/autogen_cluster.py
index b30f4f3..2faf19e 100644
--- a/opusfilter/autogen_cluster.py
+++ b/opusfilter/autogen_cluster.py
@@ -1,13 +1,17 @@
 """Unsupervised threshold selection for filters"""
 
 from collections import Counter
+import itertools
 import logging
+import os
 
 import pandas as pd
+from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.cluster import KMeans
-from sklearn import preprocessing, random_projection
+from sklearn import decomposition, preprocessing, random_projection
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.inspection import permutation_importance
+from sklearn.utils.validation import check_is_fitted
 import numpy as np
 
 from . import CLEAN_LOW
@@ -18,6 +22,147 @@
 logger = logging.getLogger(__name__)
 
 
+class ArcProjection(BaseEstimator, TransformerMixin):
+    """Project data to two dimensions using evenly distributed unit vectors
+
+    The assigment of unit vectors to the original vectors is optimized
+    so that L2 norm between the correlation coefficients of the
+    original features and dot product of the vectors is minimized.
+
+    Parameters
+    ----------
+
+    arc : float, 'auto' or 'full', default='auto'
+        Length of the arc used for projection vectors. For 'auto', the
+        length is set as the arccosine of the minimum correlation of
+        the input features. For 'full', full circle (2 * pi) is used.
+
+    Attributes
+    ----------
+    n_components_ : int
+        Number of components, always 2.
+
+    components_ : ndarray of shape (n_components, n_features)
+        Matrix used for the projection.
+
+    """
+
+    def __init__(self, arc='auto'):
+        self.n_components_ = 2
+        self.arc = arc
+
+    def _best_permutation(self, current, n_features, eval_func):
+        """Greedy search for best permutation of projection matrix"""
+        permutation = np.arange(n_features)
+        while True:
+            logger.debug("current permutation: %s", permutation)
+            minpair, mincost = self._best_single_swap(current, eval_func)
+            logger.debug("best swap: %s (%s)", minpair, mincost)
+            if minpair is None:
+                return permutation, current, mincost
+            fwd, rev = list(minpair), list(reversed(minpair))
+            current[fwd] = current[rev]
+            permutation[fwd] = permutation[rev]
+
+    @staticmethod
+    def _best_single_swap(matrix, eval_func):
+        """Find best swap of matrix rows based in eval_func"""
+        mincost = eval_func(matrix)
+        minpair = None
+        for pair in itertools.combinations(range(matrix.shape[0]), 2):
+            fwd, rev = list(pair), list(reversed(pair))
+            matrix[fwd] = matrix[rev]
+            cost = eval_func(matrix)
+            if cost < mincost:
+                mincost = cost
+                minpair = pair
+            matrix[fwd] = matrix[rev]
+        return minpair, mincost
+
+    def _make_matrix(self, corrmat, n_features):
+        """Generate the random projection matrix.
+
+        Parameters
+        ----------
+        corrmat : ndarray of shape (n_features, n_features),
+            Correlation matrix for the original features
+
+        n_features : int,
+            Dimensionality of the original source space.
+
+        Returns
+        -------
+        components : ndarray of shape (n_components, n_features)
+            The generated random matrix.
+        """
+        def costf(matrix):
+            return ((corrmat - matrix @ matrix.T)**2).sum()
+
+        if self.arc == 'full':
+            dist = 2 * np.pi / n_features
+        elif self.arc == 'auto':
+            dist = np.arccos(corrmat.min()) / (n_features - 1)
+        else:
+            dist = self.arc / (n_features - 1)
+        matrix = np.array([
+            [np.cos(idx * dist), np.sin(idx * dist)] for idx in range(n_features)
+        ])
+        _, best_m, best_cost  = self._best_permutation(matrix.copy(), n_features, costf)
+        logger.debug("fcorr:\n%s", corrmat.round(3))
+        logger.debug("vdist:\n%s", (best_m @ best_m.T).round(3))
+        logger.debug("diff:\n%s", (corrmat - best_m @ best_m.T).round(3))
+        logger.debug("cost: %s", best_cost)
+        return best_m.T
+
+    def fit(self, X: np.array, y=None):
+        """Generate a projection matrix.
+
+        Parameters
+        ----------
+        X : {ndarray, sparse matrix} of shape (n_samples, n_features)
+            Training set: only the shape is used to find matrix dimensions.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            BaseRandomProjection class instance.
+        """
+        X = self._validate_data(
+            X, accept_sparse=["csr", "csc"], dtype=[np.float64, np.float32]
+        )
+
+        n_features = X.shape[1]
+        corrmat = np.corrcoef(X.T)
+        self.components_ = self._make_matrix(
+            corrmat, n_features
+        ).astype(X.dtype, copy=False)
+
+        return self
+
+    def transform(self, X):
+        """Project the data by using matrix product with the random matrix.
+
+        Parameters
+        ----------
+        X : {ndarray, sparse matrix} of shape (n_samples, n_features)
+            The input data to project into a smaller dimensional space.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components)
+            Projected array.
+        """
+        check_is_fitted(self)
+        X = self._validate_data(
+            X, accept_sparse=["csr", "csc"], reset=False, dtype=[np.float64, np.float32]
+        )
+
+        return X @ self.components_.T
+
+
 class ScoreClusters:
     """Cluster segments by filter scores
 
@@ -42,6 +187,8 @@ def __init__(self, score_file, k=2):
         self.labels = self.kmeans.labels_
         self.cluster_centers = self.scaler.inverse_transform(self.kmeans.cluster_centers_) * self.direction_vector
         self._noisy_label = self._get_noisy_label()
+        self.rejects = None
+        self.thresholds = None
 
     @property
     def noisy_label(self):
@@ -119,17 +266,41 @@ def get_rejects(self):
 
     def get_result_df(self):
         """Return dataframe containing the thresholds and reject booleans"""
+        self.rejects = self.get_rejects()
+        self.thresholds = self.get_thresholds()
         return pd.DataFrame.from_dict(
-            {'name': self.get_columns(),
-             'threshold': self.get_thresholds(),
-             'reject': self.get_rejects()})
+            {'name': self.get_columns(), 'threshold': self.thresholds, 'reject': self.rejects})
 
-    def plot(self, plt):
+    def plot(self, plt, path=None, apply_rejects=True, projection='arc'):
         """Plot clustering and histograms"""
+        if projection == 'pca':
+            proj = decomposition.PCA(n_components=2)
+        elif projection == 'random':
+            proj = random_projection.GaussianRandomProjection(n_components=2)
+        elif projection == 'arc':
+            proj = ArcProjection()
+        else:
+            raise ValueError(f"Unknown projection: {projection}")
+        col_index = {col: idx for idx, col in enumerate(self.get_columns())}
+        index_col = dict(enumerate(self.get_columns()))
+        if apply_rejects and self.rejects:
+            indices = [idx for idx, reject in enumerate(self.rejects) if not reject]
+            cols = [index_col[idx] for idx in indices]
+            index_col = dict(enumerate(cols))
+            data_t = proj.fit_transform(self.standard_data[:, indices])
+            centroids = proj.transform(self.kmeans.cluster_centers_[:, indices])
+        else:
+            data_t = proj.fit_transform(self.standard_data)
+            centroids = proj.transform(self.kmeans.cluster_centers_)
+        plt.figure()
+        for idx in range(proj.components_.shape[1]):
+            plt.arrow(0, 0, proj.components_[0, idx], proj.components_[1, idx], head_width=0.01, fc='k')
+            plt.text(proj.components_[0, idx], proj.components_[1, idx], index_col[idx])
+        plt.gca().set_aspect('equal', adjustable='box')
+        plt.title('Projection vectors for clustering')
+        if path is not None:
+            plt.savefig(os.path.join(path, 'projection.pdf'))
         plt.figure(figsize=(10, 10))
-        projection = random_projection.GaussianRandomProjection(n_components=2)
-        data_t = projection.fit_transform(self.standard_data)
-        centroids = projection.transform(self.kmeans.cluster_centers_)
         for label_id in range(self.k):
             points = np.where(self.labels == label_id)
             plt.scatter(data_t[points, 0], data_t[points, 1],
@@ -141,10 +312,28 @@ def plot(self, plt):
                         marker='+', c='brown' if label_id == self.noisy_label else 'darkblue',
                         label='noisy centroid' if label_id == self.noisy_label else 'clean centroid')
         plt.legend()
-        plt.title('Clusters')
+        plt.title('Clustering')
+        if path is not None:
+            plt.savefig(os.path.join(path, 'clustering.pdf'))
         noisy_samples = self.df.iloc[np.where(self.labels == self.noisy_label)]
         clean_samples = self.df.iloc[np.where(self.labels != self.noisy_label)]
-        noisy_samples.hist(bins=100, figsize=(10, 10))
+        subplots_n = noisy_samples.hist(bins=100, figsize=(10, 10))
+        fig_noisy = plt.gcf()
         plt.suptitle('Histograms for noisy samples')
-        clean_samples.hist(bins=100, figsize=(10, 10))
+        subplots_c = clean_samples.hist(bins=100, figsize=(10, 10))
+        fig_clean = plt.gcf()
         plt.suptitle('Histograms for clean samples')
+        if apply_rejects and self.rejects:
+            for axes in ([axes for sublist in subplots_c for axes in sublist] +
+                         [axes for sublist in subplots_n for axes in sublist]):
+                title = axes.get_title()
+                if not title:
+                    continue
+                idx = col_index[title]
+                if self.rejects[idx]:
+                    axes.text(0.5, 1, 'REJECTED', horizontalalignment='center',
+                              verticalalignment='top', transform=axes.transAxes, color='r')
+                axes.axvline(self.thresholds[idx], color='k', linestyle='--', alpha=0.5)
+        if path is not None:
+            fig_clean.savefig(os.path.join(path, 'histogram_clean.pdf'))
+            fig_noisy.savefig(os.path.join(path, 'histogram_noisy.pdf'))