diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml index a106129..1906bec 100644 --- a/.github/workflows/pipeline.yml +++ b/.github/workflows/pipeline.yml @@ -1,6 +1,6 @@ name: pipeline -on: [push, pull_request] +on: [push] jobs: test-job: @@ -8,6 +8,7 @@ jobs: strategy: matrix: python-version: [3.7, 3.8, 3.9] + pymc-version: ["without", "pymc==4.0.0b2", "pymc3"] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} @@ -17,19 +18,24 @@ jobs: - name: Install dependencies run: | pip install -e . - pip install flake8 pytest pytest-cov codecov wheel calibr8 pymc3 + pip install -r requirements-dev.txt - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings flake8 . --count --exit-zero --statistics - - name: Test with pytest + - name: Test without PyMC + if: matrix.pymc-version == 'without' run: | pytest --cov=./bletl --cov-report xml --cov-report term-missing tests/ + - name: Install and test with PyMC + if: matrix.pymc-version != 'without' + run: | + pip install ${{ matrix.pymc-version }} + pytest --cov=./bletl --cov-report xml --cov-report term-missing tests/ - name: Upload coverage uses: codecov/codecov-action@v1 - if: matrix.python-version == 3.8 with: file: ./coverage.xml - name: Test Wheel install and import diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e6b4ca1..a0d6987 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,7 +20,7 @@ jobs: - name: Install dependencies run: | pip install -e . - pip install flake8 pytest pytest-cov twine wheel calibr8 pymc3 + pip install -r requirements-dev.txt - name: Test with pytest run: | pytest --cov=./bletl --cov-report xml --cov-report term-missing tests/ diff --git a/bletl/__init__.py b/bletl/__init__.py index 8b26086..135df7e 100644 --- a/bletl/__init__.py +++ b/bletl/__init__.py @@ -13,4 +13,4 @@ from . splines import get_crossvalidated_spline -__version__ = '1.0.5' +__version__ = '1.1.0' diff --git a/bletl/growth.py b/bletl/growth.py index 9c24be1..2755541 100644 --- a/bletl/growth.py +++ b/bletl/growth.py @@ -1,13 +1,21 @@ import logging import numpy +from packaging import version import scipy.stats import typing import arviz -import pymc3 -import theano.tensor as tt import calibr8 +from calibr8.utils import at, pm + +# Use the new ConstantData container if available, +# because it gives superior computational performance. +if hasattr(pm, "ConstantData"): + pmData = pm.ConstantData +else: + pmData = pm.Data + _log = logging.getLogger(__file__) @@ -22,7 +30,7 @@ def __init__( y:numpy.ndarray, calibration_model: calibr8.CalibrationModel, switchpoints:typing.Dict[float, str], - pmodel:pymc3.Model, + pmodel:pm.Model, theta_map:dict, ): """ Creates a result object of a growth rate analysis. @@ -37,10 +45,10 @@ def __init__( backscatter vector switchpoints : dict maps switchpoint times to labels - pmodel : pymc3.Model - the PyMC3 model underlying the analysis + pmodel : pymc.Model + the PyMC model underlying the analysis theta_map : dict - the PyMC3 MAP estimate + the PyMC MAP estimate """ self._t_data = t_data self._t_segments = t_segments @@ -91,8 +99,8 @@ def detected_switchpoints(self) -> typing.Tuple[float]: ) @property - def pmodel(self) -> pymc3.Model: - """ The PyMC3 model underlying this analysis. """ + def pmodel(self) -> pm.Model: + """ The PyMC model underlying this analysis. """ return self._pmodel @property @@ -135,7 +143,7 @@ def sample(self, **kwargs) -> None: Parameters ---------- **sample_kwargs - optional keyword-arguments to pymc3.sample(...) to override defaults + optional keyword-arguments to pymc.sample(...) to override defaults """ sample_kwargs = dict( return_inferencedata=True, @@ -147,15 +155,18 @@ def sample(self, **kwargs) -> None: ) sample_kwargs.update(kwargs) with self.pmodel: - self._idata = pymc3.sample(**sample_kwargs) + self._idata = pm.sample(**sample_kwargs) return def _make_random_walk(name:str, *, mu: float=0, sigma:float, nu: float=1, length:int, student_t:bool, initval: numpy.ndarray=None): """ Create a random walk with either a Normal or Student-t distribution. - Parameteres - ----------- + For some PyMC versions and for Student-t distributed random walks, + the distribution is created from a cumsum of a N-dimensional random variable. + + Parameters + ---------- name : str Name of the random walk variable. mu : float, array-like @@ -170,29 +181,54 @@ def _make_random_walk(name:str, *, mu: float=0, sigma:float, nu: float=1, length length : int Number of steps in the random walk. student_t : bool - If `True` a `pymc3.Deterministic` of a StudentT-random walk is created. + If `True` a `pymc.Deterministic` of a StudentT-random walk is created. Otherwise a `GaussianRandomWalk` is created. initval : numpy.ndarray Initial values for the RandomWalk variable. - If set, PyMC3 uses these values as start points for MAP optimization and MCMC sampling. + If set, PyMC uses these values as start points for MAP optimization and MCMC sampling. Returns ------- random_walk : TensorVariable The tensor variable of the random walk. """ - if student_t: - # a random walk of length N is just the cumulative sum over a N-dimensional random variable: - return pymc3.Deterministic(name, tt.cumsum( - pymc3.StudentT( - f'{name}__diff_', - mu=mu, sd=sigma, nu=nu, - shape=(length,), - testval=numpy.diff(initval, prepend=0) if initval is not None else initval - ) - )) + pmversion = version.parse(pm.__version__) + + # Adapt to rename of the testval→initval kwarg + if pmversion <= version.parse("3.11.4"): + initval_kwarg = "testval" + else: + initval_kwarg = "initval" + + if pmversion < version.parse("4.0.0b1") and not student_t: + # Use the gaussian random walk distribution directly. + return pm.GaussianRandomWalk(**{ + "name": name, + "mu": mu, + "sigma": sigma, + "shape": (length,), + initval_kwarg: initval, + }) else: - return pymc3.GaussianRandomWalk(name, mu=mu, sigma=sigma, shape=(length,), testval=initval) + # Create the random walk manually. + rv_kwargs = { + "name": f"{name}__diff_", + "mu": mu, + "sigma": sigma, + "shape": (length,), + # Since the initval refers to the random walk, but we're creating it + # using the cumsum of an RV, we need to do numpy.diff to get an initial + # value for the RV from the initial value of the random walk. + initval_kwarg: numpy.diff(initval, prepend=0) if initval is not None else None, + } + + if student_t: + rv_cls = pm.StudentT + rv_kwargs["nu"] = nu + else: + rv_cls = pm.Normal + + return pm.Deterministic(name, at.cumsum(rv_cls(**rv_kwargs))) def _get_smoothed_mu(t: numpy.ndarray, y: numpy.ndarray, cm_cdw: calibr8.CalibrationModel, *, clip=0.5) -> numpy.ndarray: @@ -245,7 +281,6 @@ def fit_mu_t( switchpoints:typing.Optional[typing.Union[typing.Sequence[float], typing.Dict[float, str]]]=None, mcmc_samples:int=0, mu_prior:float=0, - σ:float=None, drift_scale:float, nu:float=5, x0_prior:float=0.25, @@ -325,16 +360,16 @@ def fit_mu_t( # Override guess with user-provided mu_prior for nonzero starting points. mu_guess[mu_prior != 0] = mu_prior[mu_prior != 0] - # build PyMC3 model + # build PyMC model coords = { "timepoint": numpy.arange(TD), "segment": numpy.arange(TS), } - with pymc3.Model(coords=coords) as pmodel: - pymc3.Data('known_switchpoints', t_switchpoints_known) - pymc3.Data('t_data', t_data, dims="timepoint") - pymc3.Data('t_segments', t_segments, dims="segment") - dt = pymc3.Data('dt', numpy.diff(t_data), dims="segment") + with pm.Model(coords=coords) as pmodel: + pmData('known_switchpoints', t_switchpoints_known) + pmData('t_data', t_data, dims="timepoint") + pmData('t_segments', t_segments, dims="segment") + dt = pmData('dt', numpy.diff(t_data), dims="segment") if len(t_switchpoints_known) > 0: _log.info('Creating model with %d switchpoints. StudentT=%b', len(t_switchpoints_known), student_t) @@ -359,30 +394,30 @@ def fit_mu_t( mu_segments.append( _make_random_walk(name, mu_prior[slc], sigma=drift_scale, nu=nu, length=i_len, student_t=student_t, initval=mu_guess[slc]) ) - mu_t = pymc3.Deterministic('mu_t', tt.concatenate(mu_segments), dims="segment") + mu_t = pm.Deterministic('mu_t', at.concatenate(mu_segments), dims="segment") else: _log.info('Creating model without switchpoints. StudentT=%b', len(t_switchpoints_known), student_t) mu_t = _make_random_walk('mu_t', mu=mu_prior, sigma=drift_scale, nu=nu, length=TS, student_t=student_t, initval=mu_guess) - X0 = pymc3.Lognormal('X0', mu=numpy.log(x0_prior), sd=1) - Xt = pymc3.Deterministic( + X0 = pm.Lognormal('X0', mu=numpy.log(x0_prior), sd=1) + Xt = pm.Deterministic( 'X', - tt.concatenate([ + at.concatenate([ X0[None], - X0 * pymc3.math.exp(tt.extra_ops.cumsum(mu_t * dt)) + X0 * pm.math.exp(at.extra_ops.cumsum(mu_t * dt)) ]), dims="timepoint", ) calibration_model.loglikelihood( x=Xt, - y=pymc3.Data('backscatter', y, dims=('timepoint',)), + y=pmData('backscatter', y, dims=('timepoint',)), replicate_id=replicate_id, dependent_key=calibration_model.dependent_key ) # MAP fit with pmodel: - theta_map = pymc3.find_MAP(maxeval=15_000) + theta_map = pm.find_MAP(maxeval=15_000) # with StudentT random walks, switchpoints can be autodetected if student_t: diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..35fdcfb --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,7 @@ +calibr8 +codecov +flake8 +pytest +pytest-cov +twine +wheel diff --git a/setup.py b/setup.py index 184bae2..d0e4ddc 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def get_version(): zip_safe=False, version=__version__, description='Package for parsing and transforming BioLector raw data.', - url='https://jugit.fz-juelich.de/IBG-1/biopro/bletl', + url='https://github.com/jubiotech/bletl', author='Michael Osthege', author_email='m.osthege@fz-juelich.de', license='GNU Affero General Public License v3.0', diff --git a/tests/test_growth.py b/tests/test_growth.py index 48ee6ec..6f99588 100644 --- a/tests/test_growth.py +++ b/tests/test_growth.py @@ -6,8 +6,7 @@ try: import bletl.growth import calibr8 - import pymc3 - from theano.tensor import TensorVariable + from calibr8.utils import at, pm HAS_DEPENDENCIES = True except ImportError: @@ -52,24 +51,24 @@ def biomass_curve(): @pytest.mark.skipif(not HAS_DEPENDENCIES, reason="Needs optional dependencies.") class TestGrowthHelpers: def test_make_gaussian_random_walk(self): - with pymc3.Model() as pmodel: + with pm.Model() as pmodel: rv = bletl.growth._make_random_walk( "testGRW", sigma=0.02, length=20, student_t=False, ) - assert isinstance(rv, TensorVariable) + assert isinstance(rv, at.TensorVariable) def test_make_studentt_random_walk(self): - with pymc3.Model() as pmodel: + with pm.Model() as pmodel: rv = bletl.growth._make_random_walk( "testSTRW", sigma=0.02, length=20, student_t=True, ) - assert isinstance(rv, TensorVariable) + assert isinstance(rv, at.TensorVariable) pass def test_get_smoothed_mu(self, biomass_curve, biomass_calibration): @@ -86,6 +85,7 @@ def test_get_smoothed_mu(self, biomass_curve, biomass_calibration): pass +@pytest.mark.skipif(not HAS_DEPENDENCIES, reason="Needs optional dependencies.") class TestRandomWalkModel: def test_fit_mu_t_gaussian(self, biomass_curve, biomass_calibration): t, X, mu_true = biomass_curve