Skip to content

Commit

Permalink
Merge pull request #15 from JuBiotech/pymc-v4-compat
Browse files Browse the repository at this point in the history
Refactor for PyMC `4.0.0b2` compatibility of `bletl.growth`
  • Loading branch information
michaelosthege authored Feb 25, 2022
2 parents 57a384b + 6ded5ba commit ad4d5b9
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 52 deletions.
14 changes: 10 additions & 4 deletions .github/workflows/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
name: pipeline

on: [push, pull_request]
on: [push]

jobs:
test-job:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
pymc-version: ["without", "pymc==4.0.0b2", "pymc3"]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
Expand All @@ -17,19 +18,24 @@ jobs:
- name: Install dependencies
run: |
pip install -e .
pip install flake8 pytest pytest-cov codecov wheel calibr8 pymc3
pip install -r requirements-dev.txt
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings
flake8 . --count --exit-zero --statistics
- name: Test with pytest
- name: Test without PyMC
if: matrix.pymc-version == 'without'
run: |
pytest --cov=./bletl --cov-report xml --cov-report term-missing tests/
- name: Install and test with PyMC
if: matrix.pymc-version != 'without'
run: |
pip install ${{ matrix.pymc-version }}
pytest --cov=./bletl --cov-report xml --cov-report term-missing tests/
- name: Upload coverage
uses: codecov/codecov-action@v1
if: matrix.python-version == 3.8
with:
file: ./coverage.xml
- name: Test Wheel install and import
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
- name: Install dependencies
run: |
pip install -e .
pip install flake8 pytest pytest-cov twine wheel calibr8 pymc3
pip install -r requirements-dev.txt
- name: Test with pytest
run: |
pytest --cov=./bletl --cov-report xml --cov-report term-missing tests/
Expand Down
2 changes: 1 addition & 1 deletion bletl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
from . splines import get_crossvalidated_spline


__version__ = '1.0.5'
__version__ = '1.1.0'
113 changes: 74 additions & 39 deletions bletl/growth.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
import logging
import numpy
from packaging import version
import scipy.stats
import typing

import arviz
import pymc3
import theano.tensor as tt

import calibr8
from calibr8.utils import at, pm

# Use the new ConstantData container if available,
# because it gives superior computational performance.
if hasattr(pm, "ConstantData"):
pmData = pm.ConstantData
else:
pmData = pm.Data


_log = logging.getLogger(__file__)

Expand All @@ -22,7 +30,7 @@ def __init__(
y:numpy.ndarray,
calibration_model: calibr8.CalibrationModel,
switchpoints:typing.Dict[float, str],
pmodel:pymc3.Model,
pmodel:pm.Model,
theta_map:dict,
):
""" Creates a result object of a growth rate analysis.
Expand All @@ -37,10 +45,10 @@ def __init__(
backscatter vector
switchpoints : dict
maps switchpoint times to labels
pmodel : pymc3.Model
the PyMC3 model underlying the analysis
pmodel : pymc.Model
the PyMC model underlying the analysis
theta_map : dict
the PyMC3 MAP estimate
the PyMC MAP estimate
"""
self._t_data = t_data
self._t_segments = t_segments
Expand Down Expand Up @@ -91,8 +99,8 @@ def detected_switchpoints(self) -> typing.Tuple[float]:
)

@property
def pmodel(self) -> pymc3.Model:
""" The PyMC3 model underlying this analysis. """
def pmodel(self) -> pm.Model:
""" The PyMC model underlying this analysis. """
return self._pmodel

@property
Expand Down Expand Up @@ -135,7 +143,7 @@ def sample(self, **kwargs) -> None:
Parameters
----------
**sample_kwargs
optional keyword-arguments to pymc3.sample(...) to override defaults
optional keyword-arguments to pymc.sample(...) to override defaults
"""
sample_kwargs = dict(
return_inferencedata=True,
Expand All @@ -147,15 +155,18 @@ def sample(self, **kwargs) -> None:
)
sample_kwargs.update(kwargs)
with self.pmodel:
self._idata = pymc3.sample(**sample_kwargs)
self._idata = pm.sample(**sample_kwargs)
return


def _make_random_walk(name:str, *, mu: float=0, sigma:float, nu: float=1, length:int, student_t:bool, initval: numpy.ndarray=None):
""" Create a random walk with either a Normal or Student-t distribution.
Parameteres
-----------
For some PyMC versions and for Student-t distributed random walks,
the distribution is created from a cumsum of a N-dimensional random variable.
Parameters
----------
name : str
Name of the random walk variable.
mu : float, array-like
Expand All @@ -170,29 +181,54 @@ def _make_random_walk(name:str, *, mu: float=0, sigma:float, nu: float=1, length
length : int
Number of steps in the random walk.
student_t : bool
If `True` a `pymc3.Deterministic` of a StudentT-random walk is created.
If `True` a `pymc.Deterministic` of a StudentT-random walk is created.
Otherwise a `GaussianRandomWalk` is created.
initval : numpy.ndarray
Initial values for the RandomWalk variable.
If set, PyMC3 uses these values as start points for MAP optimization and MCMC sampling.
If set, PyMC uses these values as start points for MAP optimization and MCMC sampling.
Returns
-------
random_walk : TensorVariable
The tensor variable of the random walk.
"""
if student_t:
# a random walk of length N is just the cumulative sum over a N-dimensional random variable:
return pymc3.Deterministic(name, tt.cumsum(
pymc3.StudentT(
f'{name}__diff_',
mu=mu, sd=sigma, nu=nu,
shape=(length,),
testval=numpy.diff(initval, prepend=0) if initval is not None else initval
)
))
pmversion = version.parse(pm.__version__)

# Adapt to rename of the testval→initval kwarg
if pmversion <= version.parse("3.11.4"):
initval_kwarg = "testval"
else:
initval_kwarg = "initval"

if pmversion < version.parse("4.0.0b1") and not student_t:
# Use the gaussian random walk distribution directly.
return pm.GaussianRandomWalk(**{
"name": name,
"mu": mu,
"sigma": sigma,
"shape": (length,),
initval_kwarg: initval,
})
else:
return pymc3.GaussianRandomWalk(name, mu=mu, sigma=sigma, shape=(length,), testval=initval)
# Create the random walk manually.
rv_kwargs = {
"name": f"{name}__diff_",
"mu": mu,
"sigma": sigma,
"shape": (length,),
# Since the initval refers to the random walk, but we're creating it
# using the cumsum of an RV, we need to do numpy.diff to get an initial
# value for the RV from the initial value of the random walk.
initval_kwarg: numpy.diff(initval, prepend=0) if initval is not None else None,
}

if student_t:
rv_cls = pm.StudentT
rv_kwargs["nu"] = nu
else:
rv_cls = pm.Normal

return pm.Deterministic(name, at.cumsum(rv_cls(**rv_kwargs)))


def _get_smoothed_mu(t: numpy.ndarray, y: numpy.ndarray, cm_cdw: calibr8.CalibrationModel, *, clip=0.5) -> numpy.ndarray:
Expand Down Expand Up @@ -245,7 +281,6 @@ def fit_mu_t(
switchpoints:typing.Optional[typing.Union[typing.Sequence[float], typing.Dict[float, str]]]=None,
mcmc_samples:int=0,
mu_prior:float=0,
σ:float=None,
drift_scale:float,
nu:float=5,
x0_prior:float=0.25,
Expand Down Expand Up @@ -325,16 +360,16 @@ def fit_mu_t(
# Override guess with user-provided mu_prior for nonzero starting points.
mu_guess[mu_prior != 0] = mu_prior[mu_prior != 0]

# build PyMC3 model
# build PyMC model
coords = {
"timepoint": numpy.arange(TD),
"segment": numpy.arange(TS),
}
with pymc3.Model(coords=coords) as pmodel:
pymc3.Data('known_switchpoints', t_switchpoints_known)
pymc3.Data('t_data', t_data, dims="timepoint")
pymc3.Data('t_segments', t_segments, dims="segment")
dt = pymc3.Data('dt', numpy.diff(t_data), dims="segment")
with pm.Model(coords=coords) as pmodel:
pmData('known_switchpoints', t_switchpoints_known)
pmData('t_data', t_data, dims="timepoint")
pmData('t_segments', t_segments, dims="segment")
dt = pmData('dt', numpy.diff(t_data), dims="segment")

if len(t_switchpoints_known) > 0:
_log.info('Creating model with %d switchpoints. StudentT=%b', len(t_switchpoints_known), student_t)
Expand All @@ -359,30 +394,30 @@ def fit_mu_t(
mu_segments.append(
_make_random_walk(name, mu_prior[slc], sigma=drift_scale, nu=nu, length=i_len, student_t=student_t, initval=mu_guess[slc])
)
mu_t = pymc3.Deterministic('mu_t', tt.concatenate(mu_segments), dims="segment")
mu_t = pm.Deterministic('mu_t', at.concatenate(mu_segments), dims="segment")
else:
_log.info('Creating model without switchpoints. StudentT=%b', len(t_switchpoints_known), student_t)
mu_t = _make_random_walk('mu_t', mu=mu_prior, sigma=drift_scale, nu=nu, length=TS, student_t=student_t, initval=mu_guess)

X0 = pymc3.Lognormal('X0', mu=numpy.log(x0_prior), sd=1)
Xt = pymc3.Deterministic(
X0 = pm.Lognormal('X0', mu=numpy.log(x0_prior), sd=1)
Xt = pm.Deterministic(
'X',
tt.concatenate([
at.concatenate([
X0[None],
X0 * pymc3.math.exp(tt.extra_ops.cumsum(mu_t * dt))
X0 * pm.math.exp(at.extra_ops.cumsum(mu_t * dt))
]),
dims="timepoint",
)
calibration_model.loglikelihood(
x=Xt,
y=pymc3.Data('backscatter', y, dims=('timepoint',)),
y=pmData('backscatter', y, dims=('timepoint',)),
replicate_id=replicate_id,
dependent_key=calibration_model.dependent_key
)

# MAP fit
with pmodel:
theta_map = pymc3.find_MAP(maxeval=15_000)
theta_map = pm.find_MAP(maxeval=15_000)

# with StudentT random walks, switchpoints can be autodetected
if student_t:
Expand Down
7 changes: 7 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
calibr8
codecov
flake8
pytest
pytest-cov
twine
wheel
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def get_version():
zip_safe=False,
version=__version__,
description='Package for parsing and transforming BioLector raw data.',
url='https://jugit.fz-juelich.de/IBG-1/biopro/bletl',
url='https://github.com/jubiotech/bletl',
author='Michael Osthege',
author_email='[email protected]',
license='GNU Affero General Public License v3.0',
Expand Down
12 changes: 6 additions & 6 deletions tests/test_growth.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
try:
import bletl.growth
import calibr8
import pymc3
from theano.tensor import TensorVariable
from calibr8.utils import at, pm

HAS_DEPENDENCIES = True
except ImportError:
Expand Down Expand Up @@ -52,24 +51,24 @@ def biomass_curve():
@pytest.mark.skipif(not HAS_DEPENDENCIES, reason="Needs optional dependencies.")
class TestGrowthHelpers:
def test_make_gaussian_random_walk(self):
with pymc3.Model() as pmodel:
with pm.Model() as pmodel:
rv = bletl.growth._make_random_walk(
"testGRW",
sigma=0.02,
length=20,
student_t=False,
)
assert isinstance(rv, TensorVariable)
assert isinstance(rv, at.TensorVariable)

def test_make_studentt_random_walk(self):
with pymc3.Model() as pmodel:
with pm.Model() as pmodel:
rv = bletl.growth._make_random_walk(
"testSTRW",
sigma=0.02,
length=20,
student_t=True,
)
assert isinstance(rv, TensorVariable)
assert isinstance(rv, at.TensorVariable)
pass

def test_get_smoothed_mu(self, biomass_curve, biomass_calibration):
Expand All @@ -86,6 +85,7 @@ def test_get_smoothed_mu(self, biomass_curve, biomass_calibration):
pass


@pytest.mark.skipif(not HAS_DEPENDENCIES, reason="Needs optional dependencies.")
class TestRandomWalkModel:
def test_fit_mu_t_gaussian(self, biomass_curve, biomass_calibration):
t, X, mu_true = biomass_curve
Expand Down

0 comments on commit ad4d5b9

Please sign in to comment.