Merge pull request #15 from JuBiotech/pymc-v4-compat

Refactor for PyMC `4.0.0b2` compatibility of `bletl.growth`
JuBiotech · Feb 25, 2022 · ad4d5b9 · ad4d5b9
2 parents 57a384b + 6ded5ba
commit ad4d5b9
Show file tree

Hide file tree

Showing 7 changed files with 100 additions and 52 deletions.
diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml
@@ -1,13 +1,14 @@
 name: pipeline
 
-on: [push, pull_request]
+on: [push]
 
 jobs:
   test-job:
     runs-on: ubuntu-latest
     strategy:
       matrix:
         python-version: [3.7, 3.8, 3.9]
+        pymc-version: ["without", "pymc==4.0.0b2", "pymc3"]
     steps:
     - uses: actions/checkout@v2
     - name: Set up Python ${{ matrix.python-version }}
@@ -17,19 +18,24 @@ jobs:
     - name: Install dependencies
       run: |
         pip install -e .
-        pip install flake8 pytest pytest-cov codecov wheel calibr8 pymc3
+        pip install -r requirements-dev.txt
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings
         flake8 . --count --exit-zero --statistics
-    - name: Test with pytest
+    - name: Test without PyMC
+      if: matrix.pymc-version == 'without'
       run: |
         pytest --cov=./bletl --cov-report xml --cov-report term-missing tests/
+    - name: Install and test with PyMC
+      if: matrix.pymc-version != 'without'
+      run: |
+        pip install ${{ matrix.pymc-version }}
+        pytest --cov=./bletl --cov-report xml --cov-report term-missing tests/
     - name: Upload coverage
       uses: codecov/codecov-action@v1
-      if: matrix.python-version == 3.8
       with:
         file: ./coverage.xml
     - name: Test Wheel install and import

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -20,7 +20,7 @@ jobs:
     - name: Install dependencies
       run: |
         pip install -e . 
-        pip install flake8 pytest pytest-cov twine wheel calibr8 pymc3
+        pip install -r requirements-dev.txt
     - name: Test with pytest
       run: |
         pytest --cov=./bletl --cov-report xml --cov-report term-missing tests/

diff --git a/bletl/__init__.py b/bletl/__init__.py
@@ -13,4 +13,4 @@
 from . splines import get_crossvalidated_spline
 
 
-__version__ = '1.0.5'
+__version__ = '1.1.0'
diff --git a/bletl/growth.py b/bletl/growth.py
@@ -1,13 +1,21 @@
 import logging
 import numpy
+from packaging import version
 import scipy.stats
 import typing
 
 import arviz
-import pymc3
-import theano.tensor as tt
 
 import calibr8
+from calibr8.utils import at, pm
+
+# Use the new ConstantData container if available,
+# because it gives superior computational performance.
+if hasattr(pm, "ConstantData"):
+    pmData = pm.ConstantData
+else:
+    pmData = pm.Data
+
 
 _log = logging.getLogger(__file__)
 
@@ -22,7 +30,7 @@ def __init__(
         y:numpy.ndarray,
         calibration_model: calibr8.CalibrationModel,
         switchpoints:typing.Dict[float, str],
-        pmodel:pymc3.Model,
+        pmodel:pm.Model,
         theta_map:dict,
     ):
         """ Creates a result object of a growth rate analysis.
@@ -37,10 +45,10 @@ def __init__(
             backscatter vector
         switchpoints : dict
             maps switchpoint times to labels
-        pmodel : pymc3.Model
-            the PyMC3 model underlying the analysis
+        pmodel : pymc.Model
+            the PyMC model underlying the analysis
         theta_map : dict
-            the PyMC3 MAP estimate
+            the PyMC MAP estimate
         """
         self._t_data = t_data
         self._t_segments = t_segments
@@ -91,8 +99,8 @@ def detected_switchpoints(self) -> typing.Tuple[float]:
         )
 
     @property
-    def pmodel(self) -> pymc3.Model:
-        """ The PyMC3 model underlying this analysis. """
+    def pmodel(self) -> pm.Model:
+        """ The PyMC model underlying this analysis. """
         return self._pmodel
 
     @property
@@ -135,7 +143,7 @@ def sample(self, **kwargs) -> None:
         Parameters
         ----------
         **sample_kwargs
-            optional keyword-arguments to pymc3.sample(...) to override defaults
+            optional keyword-arguments to pymc.sample(...) to override defaults
         """
         sample_kwargs = dict(
             return_inferencedata=True,
@@ -147,15 +155,18 @@ def sample(self, **kwargs) -> None:
         )
         sample_kwargs.update(kwargs)
         with self.pmodel:
-            self._idata = pymc3.sample(**sample_kwargs)
+            self._idata = pm.sample(**sample_kwargs)
         return
 
 
 def _make_random_walk(name:str, *, mu: float=0, sigma:float, nu: float=1, length:int, student_t:bool, initval: numpy.ndarray=None):
     """ Create a random walk with either a Normal or Student-t distribution.
 
-    Parameteres
-    -----------
+    For some PyMC versions and for Student-t distributed random walks,
+    the distribution is created from a cumsum of a N-dimensional random variable.
+
+    Parameters
+    ----------
     name : str
         Name of the random walk variable.
     mu : float, array-like
@@ -170,29 +181,54 @@ def _make_random_walk(name:str, *, mu: float=0, sigma:float, nu: float=1, length
     length : int
         Number of steps in the random walk.
     student_t : bool
-        If `True` a `pymc3.Deterministic` of a StudentT-random walk is created.
+        If `True` a `pymc.Deterministic` of a StudentT-random walk is created.
         Otherwise a `GaussianRandomWalk` is created.
     initval : numpy.ndarray
         Initial values for the RandomWalk variable.
-        If set, PyMC3 uses these values as start points for MAP optimization and MCMC sampling.
+        If set, PyMC uses these values as start points for MAP optimization and MCMC sampling.
 
     Returns
     -------
     random_walk : TensorVariable
         The tensor variable of the random walk.
     """
-    if student_t:
-        # a random walk of length N is just the cumulative sum over a N-dimensional random variable:
-        return pymc3.Deterministic(name, tt.cumsum(
-            pymc3.StudentT(
-                f'{name}__diff_',
-                mu=mu, sd=sigma, nu=nu,
-                shape=(length,),
-                testval=numpy.diff(initval, prepend=0) if initval is not None else initval
-            )
-        ))
+    pmversion = version.parse(pm.__version__)
+
+    # Adapt to rename of the testval→initval kwarg
+    if pmversion <= version.parse("3.11.4"):
+        initval_kwarg = "testval"
+    else:
+        initval_kwarg = "initval"
+
+    if pmversion < version.parse("4.0.0b1") and not student_t:
+        # Use the gaussian random walk distribution directly.
+        return pm.GaussianRandomWalk(**{
+            "name": name,
+            "mu": mu,
+            "sigma": sigma,
+            "shape": (length,),
+            initval_kwarg: initval,
+        })
     else:
-        return pymc3.GaussianRandomWalk(name, mu=mu, sigma=sigma, shape=(length,), testval=initval)
+        # Create the random walk manually.
+        rv_kwargs = {
+            "name": f"{name}__diff_",
+            "mu": mu,
+            "sigma": sigma,
+            "shape": (length,),
+            # Since the initval refers to the random walk, but we're creating it
+            # using the cumsum of an RV, we need to do numpy.diff to get an initial
+            # value for the RV from the initial value of the random walk.
+            initval_kwarg: numpy.diff(initval, prepend=0) if initval is not None else None,
+        }
+
+        if student_t:
+            rv_cls = pm.StudentT
+            rv_kwargs["nu"] = nu
+        else:
+            rv_cls = pm.Normal
+
+        return pm.Deterministic(name, at.cumsum(rv_cls(**rv_kwargs)))
 
 
 def _get_smoothed_mu(t: numpy.ndarray, y: numpy.ndarray, cm_cdw: calibr8.CalibrationModel, *, clip=0.5) -> numpy.ndarray:
@@ -245,7 +281,6 @@ def fit_mu_t(
     switchpoints:typing.Optional[typing.Union[typing.Sequence[float], typing.Dict[float, str]]]=None,
     mcmc_samples:int=0,
     mu_prior:float=0,
-    σ:float=None,
     drift_scale:float,
     nu:float=5,
     x0_prior:float=0.25,
@@ -325,16 +360,16 @@ def fit_mu_t(
         # Override guess with user-provided mu_prior for nonzero starting points.
         mu_guess[mu_prior != 0] = mu_prior[mu_prior != 0]
 
-    # build PyMC3 model
+    # build PyMC model
     coords = {
         "timepoint": numpy.arange(TD),
         "segment": numpy.arange(TS),
     }
-    with pymc3.Model(coords=coords) as pmodel:
-        pymc3.Data('known_switchpoints', t_switchpoints_known)
-        pymc3.Data('t_data', t_data, dims="timepoint")
-        pymc3.Data('t_segments', t_segments, dims="segment")
-        dt = pymc3.Data('dt', numpy.diff(t_data), dims="segment")
+    with pm.Model(coords=coords) as pmodel:
+        pmData('known_switchpoints', t_switchpoints_known)
+        pmData('t_data', t_data, dims="timepoint")
+        pmData('t_segments', t_segments, dims="segment")
+        dt = pmData('dt', numpy.diff(t_data), dims="segment")
 
         if len(t_switchpoints_known) > 0:
             _log.info('Creating model with %d switchpoints. StudentT=%b', len(t_switchpoints_known), student_t)
@@ -359,30 +394,30 @@ def fit_mu_t(
             mu_segments.append(
                 _make_random_walk(name, mu_prior[slc], sigma=drift_scale, nu=nu, length=i_len, student_t=student_t, initval=mu_guess[slc])
             )
-            mu_t = pymc3.Deterministic('mu_t', tt.concatenate(mu_segments), dims="segment")
+            mu_t = pm.Deterministic('mu_t', at.concatenate(mu_segments), dims="segment")
         else:
             _log.info('Creating model without switchpoints. StudentT=%b', len(t_switchpoints_known), student_t)
             mu_t = _make_random_walk('mu_t', mu=mu_prior, sigma=drift_scale, nu=nu, length=TS, student_t=student_t, initval=mu_guess)
 
-        X0 = pymc3.Lognormal('X0', mu=numpy.log(x0_prior), sd=1)
-        Xt = pymc3.Deterministic(
+        X0 = pm.Lognormal('X0', mu=numpy.log(x0_prior), sd=1)
+        Xt = pm.Deterministic(
             'X',
-            tt.concatenate([
+            at.concatenate([
                 X0[None],
-                X0 * pymc3.math.exp(tt.extra_ops.cumsum(mu_t * dt))
+                X0 * pm.math.exp(at.extra_ops.cumsum(mu_t * dt))
             ]),
             dims="timepoint",
         )
         calibration_model.loglikelihood(
             x=Xt,
-            y=pymc3.Data('backscatter', y, dims=('timepoint',)),
+            y=pmData('backscatter', y, dims=('timepoint',)),
             replicate_id=replicate_id,
             dependent_key=calibration_model.dependent_key
         )
 
     # MAP fit
     with pmodel:
-        theta_map = pymc3.find_MAP(maxeval=15_000)
+        theta_map = pm.find_MAP(maxeval=15_000)
 
     # with StudentT random walks, switchpoints can be autodetected
     if student_t:

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -0,0 +1,7 @@
+calibr8
+codecov
+flake8
+pytest
+pytest-cov
+twine
+wheel
diff --git a/setup.py b/setup.py
@@ -25,7 +25,7 @@ def get_version():
         zip_safe=False,
         version=__version__,
         description='Package for parsing and transforming BioLector raw data.',
-        url='https://jugit.fz-juelich.de/IBG-1/biopro/bletl',
+        url='https://github.com/jubiotech/bletl',
         author='Michael Osthege',
         author_email='[email protected]',
         license='GNU Affero General Public License v3.0',

diff --git a/tests/test_growth.py b/tests/test_growth.py
@@ -6,8 +6,7 @@
 try:
     import bletl.growth
     import calibr8
-    import pymc3
-    from theano.tensor import TensorVariable
+    from calibr8.utils import at, pm
 
     HAS_DEPENDENCIES = True
 except ImportError:
@@ -52,24 +51,24 @@ def biomass_curve():
 @pytest.mark.skipif(not HAS_DEPENDENCIES, reason="Needs optional dependencies.")
 class TestGrowthHelpers:
     def test_make_gaussian_random_walk(self):
-        with pymc3.Model() as pmodel:
+        with pm.Model() as pmodel:
             rv = bletl.growth._make_random_walk(
                 "testGRW",
                 sigma=0.02,
                 length=20,
                 student_t=False,
             )
-            assert isinstance(rv, TensorVariable)
+            assert isinstance(rv, at.TensorVariable)
 
     def test_make_studentt_random_walk(self):
-        with pymc3.Model() as pmodel:
+        with pm.Model() as pmodel:
             rv = bletl.growth._make_random_walk(
                 "testSTRW",
                 sigma=0.02,
                 length=20,
                 student_t=True,
             )
-            assert isinstance(rv, TensorVariable)
+            assert isinstance(rv, at.TensorVariable)
         pass
 
     def test_get_smoothed_mu(self, biomass_curve, biomass_calibration):
@@ -86,6 +85,7 @@ def test_get_smoothed_mu(self, biomass_curve, biomass_calibration):
         pass
 
 
+@pytest.mark.skipif(not HAS_DEPENDENCIES, reason="Needs optional dependencies.")
 class TestRandomWalkModel:
     def test_fit_mu_t_gaussian(self, biomass_curve, biomass_calibration):
         t, X, mu_true = biomass_curve
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,4 +13,4 @@
		from . splines import get_crossvalidated_spline


		__version__ = '1.0.5'
		__version__ = '1.1.0'