From 3abcbcca51052958172b69a347cdd94c245d96d5 Mon Sep 17 00:00:00 2001
From: Gregor Boehl <admin@gregorboehl.com>
Date: Thu, 6 Jun 2024 14:43:04 +0200
Subject: [PATCH] Revert "revert to mvt from scipy"

This reverts commit d2b7f679dee6feb094d58578dc6ed1d044e70340.
---
 dime_sampler/moves.py                | 100 ++++++++++++++++-----------
 dime_sampler/test_storage/median.npy | Bin 136 -> 136 bytes
 2 files changed, 60 insertions(+), 40 deletions(-)

diff --git a/dime_sampler/moves.py b/dime_sampler/moves.py
index 7092378..43e7986 100644
--- a/dime_sampler/moves.py
+++ b/dime_sampler/moves.py
@@ -7,6 +7,25 @@
 from scipy.stats import multivariate_t
 
 
+def mvt_sample(df, mean, cov, size, random):
+    """Sample from multivariate t distribution
+
+    For reasons beyond my understanding, the results from random.multivariate_normal with non-identity covariance matrix are not reproducibel across architecture for given random seeds. Since scipy.stats.multivariate_t is based on numpy's multivariate_normal, the workaround is to crochet this manually. Advantage is that the scipy dependency drops out.
+    """
+
+    dim = len(mean)
+
+    # draw samples
+    snorm = random.randn(size, dim)
+    chi2 = random.chisquare(df, size) / df
+
+    # calculate sqrt of covariance
+    svd_cov = np.linalg.svd(cov * (df - 2) / df)
+    sqrt_cov = svd_cov[0] * np.sqrt(svd_cov[1]) @ svd_cov[2]
+
+    return mean + snorm @ sqrt_cov / np.sqrt(chi2)[:, None]
+
+
 class DIMEMove(RedBlueMove):
     r"""A proposal using adaptive differential-independence mixture ensemble MCMC.
 
@@ -19,9 +38,9 @@ class DIMEMove(RedBlueMove):
     gamma : float, optional
         mean stretch factor for the proposal vector. By default, it is :math:`2.38 / \sqrt{2\,\mathrm{ndim}}` as recommended by `ter Braak (2006) <http://www.stat.columbia.edu/~gelman/stuff_for_blog/cajo.pdf>`_.
     aimh_prob : float, optional
-        probability to draw an global transition kernel. By default this is set to :math:`0.1`.
+        probability to draw an adaptive independence Metropolis Hastings (AIMH) proposal. By default this is set to :math:`0.1`.
     df_proposal_dist : float, optional
-        degrees of freedom of the multivariate t distribution used for global kernel. Defaults to :math:`10`.
+        degrees of freedom of the multivariate t distribution used for AIMH proposals. Defaults to :math:`10`.
     rho : float, optional
         decay parameter for the aimh proposal mean and covariances. Defaults to :math:`0.999`.
     """
@@ -51,10 +70,10 @@ def setup(self, coords):
             # even more MAGIC
             self.prop_cov = np.eye(npar)
             self.prop_mean = np.zeros(npar)
+            self.accepted = np.ones(nchain, dtype=bool)
             self.cumlweight = -np.inf
-            self.update_proposal_dist(None)
         else:
-            # update global kernel proposal distribution
+            # update AIMH proposal distribution
             self.update_proposal_dist(coords)
 
     def propose(self, model, state):
@@ -70,39 +89,30 @@ def update_proposal_dist(self, x):
         """Update proposal distribution with ensemble `x`
         """
 
-        if x is not None:
-            # calculate global kernel parameters
-            nchain, npar = x.shape
-
-            # log weight of current ensemble
-            if self.accepted.any():
-                lweight = logsumexp(self.lprobs) + \
-                    np.log(sum(self.accepted)) - np.log(nchain)
-            else:
-                lweight = -np.inf
-
-            # calculate stats for current ensemble
-            ncov = np.cov(x.T, ddof=1)
-            nmean = np.mean(x, axis=0)
-
-            # update global kernel proposal distribution
-            newcumlweight = np.logaddexp(self.cumlweight, lweight)
-            self.prop_cov = (
-                np.exp(self.cumlweight - newcumlweight) * self.prop_cov
-                + np.exp(lweight - newcumlweight) * ncov
-            )
-            self.prop_mean = (
-                np.exp(self.cumlweight - newcumlweight) * self.prop_mean
-                + np.exp(lweight - newcumlweight) * nmean
-            )
-            self.cumlweight = newcumlweight + np.log(self.decay)
-
-        # create frozen distribution 
-        self.mvt = multivariate_t(
-            self.prop_mean,
-            self.prop_cov * (self.dft - 2) / self.dft,
-            df=self.dft
+        nchain, npar = x.shape
+
+        # log weight of current ensemble
+        if self.accepted.any():
+            lweight = logsumexp(self.lprobs) + \
+                np.log(sum(self.accepted)) - np.log(nchain)
+        else:
+            lweight = -np.inf
+
+        # calculate stats for current ensemble
+        ncov = np.cov(x.T, ddof=1)
+        nmean = np.mean(x, axis=0)
+
+        # update AIMH proposal distribution
+        newcumlweight = np.logaddexp(self.cumlweight, lweight)
+        self.prop_cov = (
+            np.exp(self.cumlweight - newcumlweight) * self.prop_cov
+            + np.exp(lweight - newcumlweight) * ncov
+        )
+        self.prop_mean = (
+            np.exp(self.cumlweight - newcumlweight) * self.prop_mean
+            + np.exp(lweight - newcumlweight) * nmean
         )
+        self.cumlweight = newcumlweight + np.log(self.decay)
 
     def get_proposal(self, x, xref, random):
         """Actual proposal function
@@ -121,13 +131,23 @@ def get_proposal(self, x, xref, random):
         q = x + self.g0 * (xref[i0 % nref] - xref[i1 % nref]) + f[:, None]
         factors = np.zeros(nchain, dtype=np.float64)
 
-        # draw chains for global transition kernel
+        # draw chains for AIMH sampling
         xchnge = random.rand(nchain) <= self.aimh_prob
 
         # draw alternative candidates and calculate their proposal density
-        xcand = self.mvt.rvs(size=sum(xchnge), random_state=random)
-        lprop_old = self.mvt.logpdf(x[xchnge])
-        lprop_new = self.mvt.logpdf(xcand)
+        xcand = mvt_sample(
+            df=self.dft,
+            mean=self.prop_mean,
+            cov=self.prop_cov,
+            size=sum(xchnge),
+            random=random,
+        )
+        lprop_old, lprop_new = multivariate_t.logpdf(
+            np.vstack((x[None, xchnge], xcand[None])),
+            self.prop_mean,
+            self.prop_cov * (self.dft - 2) / self.dft,
+            df=self.dft,
+        )
 
         # update proposals and factors
         q[xchnge, :] = np.reshape(xcand, (-1, npar))
diff --git a/dime_sampler/test_storage/median.npy b/dime_sampler/test_storage/median.npy
index d8b2aa1405d4ebb5f2e9d2733a04cffa43874f5d..dea4d911f8ccd02e2baa77e8b6b181e2c3856b6b 100644
GIT binary patch
delta 15
WcmeBR>|mVGz`+>m`BL-RZ+ieJdIjtN

delta 15
WcmeBR>|mVGz!4ZG)OqI3PkR6=p$1(5