From 35cc6e506c3627f5fdc2034892a85a98bc62cd63 Mon Sep 17 00:00:00 2001 From: zhengp0 Date: Tue, 25 Jun 2024 08:46:13 -0700 Subject: [PATCH 1/5] use ruff format files --- docs/source/conf.py | 47 +-- ruff.toml | 8 + src/mrtool/__init__.py | 7 +- src/mrtool/core/cov_model.py | 529 +++++++++++++++++--------- src/mrtool/core/data.py | 259 ++++++++----- src/mrtool/core/model.py | 465 ++++++++++++---------- src/mrtool/core/plots.py | 171 ++++++--- src/mrtool/core/utils.py | 175 +++++---- src/mrtool/cov_selection/covfinder.py | 198 ++++++---- tests/test_covmodel.py | 275 ++++++++----- tests/test_data.py | 260 +++++++------ tests/test_utils.py | 118 +++--- 12 files changed, 1541 insertions(+), 971 deletions(-) create mode 100644 ruff.toml diff --git a/docs/source/conf.py b/docs/source/conf.py index cd3d040..d1e8644 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -16,25 +16,26 @@ import sys import mrtool + base_dir = Path(mrtool.__file__).parent about = {} -with (base_dir / '__about__.py').open() as f: +with (base_dir / "__about__.py").open() as f: exec(f.read(), about) -sys.path.insert(0, Path('..').resolve()) +sys.path.insert(0, Path("..").resolve()) # -- Project information ----------------------------------------------------- -project = about['__title__'] +project = about["__title__"] copyright = f"2020, {about['__author__']}" -author = about['__author__'] +author = about["__author__"] # The short X.Y version. -version = about['__version__'] +version = about["__version__"] # The full version, including alpha/beta/rc tags. -release = about['__version__'] +release = about["__version__"] # -- General configuration --------------------------------------------------- @@ -42,26 +43,26 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -needs_sphinx = '1.5' +needs_sphinx = "1.5" extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.intersphinx', - 'sphinx.ext.doctest', - 'sphinx.ext.todo', - 'sphinx.ext.coverage', - 'sphinx.ext.mathjax', - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode', - 'sphinx_autodoc_typehints', - 'matplotlib.sphinxext.plot_directive', + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + "sphinx.ext.doctest", + "sphinx.ext.todo", + "sphinx.ext.coverage", + "sphinx.ext.mathjax", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx_autodoc_typehints", + "matplotlib.sphinxext.plot_directive", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] -source_suffix = '.rst' -master_doc = 'index' +source_suffix = ".rst" +master_doc = "index" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -74,14 +75,14 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] html_css_files = [ - 'css/custom.css', + "css/custom.css", ] add_module_names = False diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..ecd7188 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,8 @@ +line-length = 80 +src = ["src"] + +[format] +docstring-code-format = true + +[lint.pydocstyle] +convention = "numpy" \ No newline at end of file diff --git a/src/mrtool/__init__.py b/src/mrtool/__init__.py index 6328c49..79307ec 100644 --- a/src/mrtool/__init__.py +++ b/src/mrtool/__init__.py @@ -1,10 +1,11 @@ # -*- coding: utf-8 -*- """ - mrtool - ~~~~~~ +mrtool +~~~~~~ - `mrtool` package. +`mrtool` package. """ + from .core.data import MRData from .core.cov_model import CovModel, LinearCovModel, LogCovModel from .core.model import MRBRT, MRBeRT diff --git a/src/mrtool/core/cov_model.py b/src/mrtool/core/cov_model.py index 8ba3ea7..665250f 100644 --- a/src/mrtool/core/cov_model.py +++ b/src/mrtool/core/cov_model.py @@ -1,10 +1,11 @@ # -*- coding: utf-8 -*- """ - cov_model - ~~~~~~~~~ +cov_model +~~~~~~~~~ - Covariates model for `mrtool`. +Covariates model for `mrtool`. """ + from typing import Tuple import numpy as np @@ -15,48 +16,49 @@ class CovModel: - """Covariates model. - """ - - def __init__(self, - alt_cov, - name=None, - ref_cov=None, - use_re=False, - use_re_mid_point=False, - use_spline=False, - use_spline_intercept=False, - spline_knots_type='frequency', - spline_knots=np.linspace(0.0, 1.0, 4), - spline_degree=3, - spline_l_linear=False, - spline_r_linear=False, - prior_spline_derval_gaussian=None, - prior_spline_derval_gaussian_domain=(0.0, 1.0), - prior_spline_derval_uniform=None, - prior_spline_derval_uniform_domain=(0.0, 1.0), - prior_spline_der2val_gaussian=None, - prior_spline_der2val_gaussian_domain=(0.0, 1.0), - prior_spline_der2val_uniform=None, - prior_spline_der2val_uniform_domain=(0.0, 1.0), - prior_spline_funval_gaussian=None, - prior_spline_funval_gaussian_domain=(0.0, 1.0), - prior_spline_funval_uniform=None, - prior_spline_funval_uniform_domain=(0.0, 1.0), - prior_spline_monotonicity=None, - prior_spline_monotonicity_domain=(0.0, 1.0), - prior_spline_convexity=None, - prior_spline_convexity_domain=(0.0, 1.0), - prior_spline_num_constraint_points=20, - prior_spline_maxder_gaussian=None, - prior_spline_maxder_uniform=None, - prior_spline_normalization=None, - prior_beta_gaussian=None, - prior_beta_uniform=None, - prior_beta_laplace=None, - prior_gamma_gaussian=None, - prior_gamma_uniform=None, - prior_gamma_laplace=None): + """Covariates model.""" + + def __init__( + self, + alt_cov, + name=None, + ref_cov=None, + use_re=False, + use_re_mid_point=False, + use_spline=False, + use_spline_intercept=False, + spline_knots_type="frequency", + spline_knots=np.linspace(0.0, 1.0, 4), + spline_degree=3, + spline_l_linear=False, + spline_r_linear=False, + prior_spline_derval_gaussian=None, + prior_spline_derval_gaussian_domain=(0.0, 1.0), + prior_spline_derval_uniform=None, + prior_spline_derval_uniform_domain=(0.0, 1.0), + prior_spline_der2val_gaussian=None, + prior_spline_der2val_gaussian_domain=(0.0, 1.0), + prior_spline_der2val_uniform=None, + prior_spline_der2val_uniform_domain=(0.0, 1.0), + prior_spline_funval_gaussian=None, + prior_spline_funval_gaussian_domain=(0.0, 1.0), + prior_spline_funval_uniform=None, + prior_spline_funval_uniform_domain=(0.0, 1.0), + prior_spline_monotonicity=None, + prior_spline_monotonicity_domain=(0.0, 1.0), + prior_spline_convexity=None, + prior_spline_convexity_domain=(0.0, 1.0), + prior_spline_num_constraint_points=20, + prior_spline_maxder_gaussian=None, + prior_spline_maxder_uniform=None, + prior_spline_normalization=None, + prior_beta_gaussian=None, + prior_beta_uniform=None, + prior_beta_laplace=None, + prior_gamma_gaussian=None, + prior_gamma_uniform=None, + prior_gamma_laplace=None, + ): """Constructor of the covariate model. Args: @@ -170,25 +172,43 @@ def __init__(self, self.spline_r_linear = spline_r_linear self.prior_spline_derval_gaussian = prior_spline_derval_gaussian - self.prior_spline_derval_gaussian_domain_template = np.array(prior_spline_derval_gaussian_domain) + self.prior_spline_derval_gaussian_domain_template = np.array( + prior_spline_derval_gaussian_domain + ) self.prior_spline_derval_uniform = prior_spline_derval_uniform - self.prior_spline_derval_uniform_domain_template = np.array(prior_spline_derval_uniform_domain) + self.prior_spline_derval_uniform_domain_template = np.array( + prior_spline_derval_uniform_domain + ) self.prior_spline_der2val_gaussian = prior_spline_der2val_gaussian - self.prior_spline_der2val_gaussian_domain_template = np.array(prior_spline_der2val_gaussian_domain) + self.prior_spline_der2val_gaussian_domain_template = np.array( + prior_spline_der2val_gaussian_domain + ) self.prior_spline_der2val_uniform = prior_spline_der2val_uniform - self.prior_spline_der2val_uniform_domain_template = np.array(prior_spline_der2val_uniform_domain) + self.prior_spline_der2val_uniform_domain_template = np.array( + prior_spline_der2val_uniform_domain + ) self.prior_spline_funval_gaussian = prior_spline_funval_gaussian - self.prior_spline_funval_gaussian_domain_template = np.array(prior_spline_funval_gaussian_domain) + self.prior_spline_funval_gaussian_domain_template = np.array( + prior_spline_funval_gaussian_domain + ) self.prior_spline_funval_uniform = prior_spline_funval_uniform - self.prior_spline_funval_uniform_domain_template = np.array(prior_spline_funval_uniform_domain) + self.prior_spline_funval_uniform_domain_template = np.array( + prior_spline_funval_uniform_domain + ) self.prior_spline_monotonicity = prior_spline_monotonicity self.prior_spline_monotonicity_domain = None - self.prior_spline_monotonicity_domain_template = np.array(prior_spline_monotonicity_domain) + self.prior_spline_monotonicity_domain_template = np.array( + prior_spline_monotonicity_domain + ) self.prior_spline_convexity = prior_spline_convexity self.prior_spline_convexity_domain = None - self.prior_spline_convexity_domain_template = np.array(prior_spline_convexity_domain) - self.prior_spline_num_constraint_points = prior_spline_num_constraint_points + self.prior_spline_convexity_domain_template = np.array( + prior_spline_convexity_domain + ) + self.prior_spline_num_constraint_points = ( + prior_spline_num_constraint_points + ) self.prior_spline_maxder_gaussian = prior_spline_maxder_gaussian self.prior_spline_maxder_uniform = prior_spline_maxder_uniform self.prior_spline_normalization = prior_spline_normalization @@ -226,8 +246,7 @@ def __init__(self, self._process_priors() def _check_inputs(self): - """Check the attributes. - """ + """Check the attributes.""" assert utils.is_cols(self.alt_cov) assert utils.is_cols(self.ref_cov) assert isinstance(self.name, str) or self.name is None @@ -240,7 +259,7 @@ def _check_inputs(self): # spline specific assert self.spline is None or isinstance(self.spline, xspline.XSpline) - assert self.spline_knots_type in ['frequency', 'domain'] + assert self.spline_knots_type in ["frequency", "domain"] assert isinstance(self.spline_knots_template, np.ndarray) assert np.min(self.spline_knots_template) >= 0.0 assert np.max(self.spline_knots_template) <= 1.0 @@ -257,20 +276,40 @@ def _check_inputs(self): assert len(self.prior_spline_der2val_gaussian_domain_template) == 2 assert len(self.prior_spline_funval_gaussian_domain_template) == 2 - assert (np.diff(self.prior_spline_monotonicity_domain_template) >= 0.0).all() - assert (np.diff(self.prior_spline_convexity_domain_template) >= 0.0).all() - assert (np.diff(self.prior_spline_derval_gaussian_domain_template) >= 0.0).all() - assert (np.diff(self.prior_spline_derval_uniform_domain_template) >= 0.0).all() - assert (np.diff(self.prior_spline_der2val_gaussian_domain_template) >= 0.0).all() - assert (np.diff(self.prior_spline_der2val_uniform_domain_template) >= 0.0).all() - assert (np.diff(self.prior_spline_funval_gaussian_domain_template) >= 0.0).all() - assert (np.diff(self.prior_spline_funval_uniform_domain_template) >= 0.0).all() + assert ( + np.diff(self.prior_spline_monotonicity_domain_template) >= 0.0 + ).all() + assert ( + np.diff(self.prior_spline_convexity_domain_template) >= 0.0 + ).all() + assert ( + np.diff(self.prior_spline_derval_gaussian_domain_template) >= 0.0 + ).all() + assert ( + np.diff(self.prior_spline_derval_uniform_domain_template) >= 0.0 + ).all() + assert ( + np.diff(self.prior_spline_der2val_gaussian_domain_template) >= 0.0 + ).all() + assert ( + np.diff(self.prior_spline_der2val_uniform_domain_template) >= 0.0 + ).all() + assert ( + np.diff(self.prior_spline_funval_gaussian_domain_template) >= 0.0 + ).all() + assert ( + np.diff(self.prior_spline_funval_uniform_domain_template) >= 0.0 + ).all() # priors - assert (self.prior_spline_monotonicity in ['increasing', 'decreasing'] or - self.prior_spline_monotonicity is None) - assert (self.prior_spline_convexity in ['convex', 'concave'] or - self.prior_spline_convexity is None) + assert ( + self.prior_spline_monotonicity in ["increasing", "decreasing"] + or self.prior_spline_monotonicity is None + ) + assert ( + self.prior_spline_convexity in ["convex", "concave"] + or self.prior_spline_convexity is None + ) assert isinstance(self.prior_spline_num_constraint_points, int) assert self.prior_spline_num_constraint_points > 0 assert utils.is_gaussian_prior(self.prior_spline_derval_gaussian) @@ -280,13 +319,19 @@ def _check_inputs(self): assert utils.is_gaussian_prior(self.prior_beta_gaussian) assert utils.is_gaussian_prior(self.prior_gamma_gaussian) - assert self.prior_spline_normalization is None or len(self.prior_spline_normalization) == 2 or \ - len(self.prior_spline_normalization) == 3 + assert ( + self.prior_spline_normalization is None + or len(self.prior_spline_normalization) == 2 + or len(self.prior_spline_normalization) == 3 + ) if self.prior_spline_normalization is not None: assert (self.prior_spline_normalization[-1] >= 0.0).all() assert sum(self.prior_spline_normalization[-1]) > 0.0 if len(self.prior_spline_normalization) == 3: - assert (self.prior_spline_normalization[0] <= self.prior_spline_normalization[1]).all() + assert ( + self.prior_spline_normalization[0] + <= self.prior_spline_normalization[1] + ).all() assert utils.is_uniform_prior(self.prior_spline_derval_uniform) assert utils.is_uniform_prior(self.prior_spline_der2val_uniform) @@ -298,8 +343,7 @@ def _check_inputs(self): assert utils.is_laplace_prior(self.prior_gamma_laplace) def _process_inputs(self): - """Process attributes. - """ + """Process attributes.""" # covariates names if not isinstance(self.alt_cov, list): self.alt_cov = [self.alt_cov] @@ -310,40 +354,49 @@ def _process_inputs(self): if len(self.alt_cov) == 1: self.name = self.alt_cov[0] else: - self.name = 'cov' + '{:0>3}'.format(np.random.randint(1000)) + self.name = "cov" + "{:0>3}".format(np.random.randint(1000)) # spline knots - self.spline_knots_template = np.hstack([self.spline_knots_template, [0.0, 1.0]]) + self.spline_knots_template = np.hstack( + [self.spline_knots_template, [0.0, 1.0]] + ) self.spline_knots_template = np.unique(self.spline_knots_template) def _process_priors(self): - """Process priors. - """ + """Process priors.""" # prior information if self.use_spline: self.prior_spline_maxder_gaussian = utils.input_gaussian_prior( - self.prior_spline_maxder_gaussian, self.spline_knots_template.size - 1 + self.prior_spline_maxder_gaussian, + self.spline_knots_template.size - 1, ) self.prior_spline_maxder_uniform = utils.input_uniform_prior( - self.prior_spline_maxder_uniform, self.spline_knots_template.size - 1 + self.prior_spline_maxder_uniform, + self.spline_knots_template.size - 1, ) self.prior_spline_derval_gaussian = utils.input_gaussian_prior( - self.prior_spline_derval_gaussian, self.prior_spline_num_constraint_points + self.prior_spline_derval_gaussian, + self.prior_spline_num_constraint_points, ) self.prior_spline_derval_uniform = utils.input_uniform_prior( - self.prior_spline_derval_uniform, self.prior_spline_num_constraint_points + self.prior_spline_derval_uniform, + self.prior_spline_num_constraint_points, ) self.prior_spline_der2val_gaussian = utils.input_gaussian_prior( - self.prior_spline_der2val_gaussian, self.prior_spline_num_constraint_points + self.prior_spline_der2val_gaussian, + self.prior_spline_num_constraint_points, ) self.prior_spline_der2val_uniform = utils.input_uniform_prior( - self.prior_spline_der2val_uniform, self.prior_spline_num_constraint_points + self.prior_spline_der2val_uniform, + self.prior_spline_num_constraint_points, ) self.prior_spline_funval_gaussian = utils.input_gaussian_prior( - self.prior_spline_funval_gaussian, self.prior_spline_num_constraint_points + self.prior_spline_funval_gaussian, + self.prior_spline_num_constraint_points, ) self.prior_spline_funval_uniform = utils.input_uniform_prior( - self.prior_spline_funval_uniform, self.prior_spline_num_constraint_points + self.prior_spline_funval_uniform, + self.prior_spline_num_constraint_points, ) else: self.prior_spline_maxder_gaussian = None @@ -370,21 +423,23 @@ def _process_priors(self): ) def attach_data(self, data: MRData): - """Attach data. - """ + """Attach data.""" if self.use_spline: - self.spline = self.create_spline(data, spline_knots=self.spline_knots) + self.spline = self.create_spline( + data, spline_knots=self.spline_knots + ) self.spline_knots = self.spline.knots def has_data(self): - """Return ``True`` if there is one data object attached. - """ + """Return ``True`` if there is one data object attached.""" if self.use_spline: return self.spline is not None else: return True - def create_spline(self, data: MRData, spline_knots: np.ndarray = None) -> xspline.XSpline: + def create_spline( + self, data: MRData, spline_knots: np.ndarray = None + ) -> xspline.XSpline: """Create spline given current spline parameters. Args: data (mrtool.MRData): @@ -407,33 +462,61 @@ def create_spline(self, data: MRData, spline_knots: np.ndarray = None) -> xsplin cov = np.unique(cov) if spline_knots is None: - if self.spline_knots_type == 'frequency': + if self.spline_knots_type == "frequency": spline_knots = np.quantile(cov, self.spline_knots_template) else: - spline_knots = cov.min() + self.spline_knots_template*(cov.max() - cov.min()) - - self.prior_spline_monotonicity_domain = spline_knots[0] + \ - self.prior_spline_monotonicity_domain_template*(spline_knots[-1] - spline_knots[0]) - self.prior_spline_convexity_domain = spline_knots[0] + \ - self.prior_spline_convexity_domain_template*(spline_knots[-1] - spline_knots[0]) - - self.prior_spline_derval_gaussian_domain = spline_knots[0] + \ - self.prior_spline_derval_gaussian_domain_template*(spline_knots[-1] - spline_knots[0]) - self.prior_spline_derval_uniform_domain = spline_knots[0] + \ - self.prior_spline_derval_uniform_domain_template*(spline_knots[-1] - spline_knots[0]) - self.prior_spline_der2val_gaussian_domain = spline_knots[0] + \ - self.prior_spline_der2val_gaussian_domain_template*(spline_knots[-1] - spline_knots[0]) - self.prior_spline_der2val_uniform_domain = spline_knots[0] + \ - self.prior_spline_der2val_uniform_domain_template*(spline_knots[-1] - spline_knots[0]) - self.prior_spline_funval_gaussian_domain = spline_knots[0] + \ - self.prior_spline_funval_gaussian_domain_template*(spline_knots[-1] - spline_knots[0]) - self.prior_spline_funval_uniform_domain = spline_knots[0] + \ - self.prior_spline_funval_uniform_domain_template*(spline_knots[-1] - spline_knots[0]) - - spline = xspline.XSpline(spline_knots, - self.spline_degree, - l_linear=self.spline_l_linear, - r_linear=self.spline_r_linear) + spline_knots = cov.min() + self.spline_knots_template * ( + cov.max() - cov.min() + ) + + self.prior_spline_monotonicity_domain = spline_knots[ + 0 + ] + self.prior_spline_monotonicity_domain_template * ( + spline_knots[-1] - spline_knots[0] + ) + self.prior_spline_convexity_domain = spline_knots[ + 0 + ] + self.prior_spline_convexity_domain_template * ( + spline_knots[-1] - spline_knots[0] + ) + + self.prior_spline_derval_gaussian_domain = spline_knots[ + 0 + ] + self.prior_spline_derval_gaussian_domain_template * ( + spline_knots[-1] - spline_knots[0] + ) + self.prior_spline_derval_uniform_domain = spline_knots[ + 0 + ] + self.prior_spline_derval_uniform_domain_template * ( + spline_knots[-1] - spline_knots[0] + ) + self.prior_spline_der2val_gaussian_domain = spline_knots[ + 0 + ] + self.prior_spline_der2val_gaussian_domain_template * ( + spline_knots[-1] - spline_knots[0] + ) + self.prior_spline_der2val_uniform_domain = spline_knots[ + 0 + ] + self.prior_spline_der2val_uniform_domain_template * ( + spline_knots[-1] - spline_knots[0] + ) + self.prior_spline_funval_gaussian_domain = spline_knots[ + 0 + ] + self.prior_spline_funval_gaussian_domain_template * ( + spline_knots[-1] - spline_knots[0] + ) + self.prior_spline_funval_uniform_domain = spline_knots[ + 0 + ] + self.prior_spline_funval_uniform_domain_template * ( + spline_knots[-1] - spline_knots[0] + ) + + spline = xspline.XSpline( + spline_knots, + self.spline_degree, + l_linear=self.spline_l_linear, + r_linear=self.spline_r_linear, + ) return spline @@ -449,18 +532,28 @@ def create_design_mat(self, data): alt_cov = data.get_covs(self.alt_cov) ref_cov = data.get_covs(self.ref_cov) - alt_mat = utils.avg_integral(alt_cov, spline=self.spline, - use_spline_intercept=self.use_spline_intercept) - ref_mat = utils.avg_integral(ref_cov, spline=self.spline, - use_spline_intercept=self.use_spline_intercept) + alt_mat = utils.avg_integral( + alt_cov, + spline=self.spline, + use_spline_intercept=self.use_spline_intercept, + ) + ref_mat = utils.avg_integral( + ref_cov, + spline=self.spline, + use_spline_intercept=self.use_spline_intercept, + ) return alt_mat, ref_mat def create_x_fun(self, data): - raise NotImplementedError("Cannot use create_x_fun directly in CovModel class.") + raise NotImplementedError( + "Cannot use create_x_fun directly in CovModel class." + ) def create_z_mat(self, data): - raise NotImplementedError("Cannot use create_z_mat directly in CovModel class.") + raise NotImplementedError( + "Cannot use create_z_mat directly in CovModel class." + ) def create_constraint_mat(self) -> Tuple[np.ndarray, np.ndarray]: """Create constraint matrix. @@ -474,58 +567,104 @@ def create_constraint_mat(self) -> Tuple[np.ndarray, np.ndarray]: if not self.use_spline: return c_mat, c_val - derval_points = np.linspace(*self.prior_spline_derval_uniform_domain, - self.prior_spline_num_constraint_points) - der2val_points = np.linspace(*self.prior_spline_der2val_uniform_domain, - self.prior_spline_num_constraint_points) - funval_points = np.linspace(*self.prior_spline_funval_uniform_domain, - self.prior_spline_num_constraint_points) - mono_points = np.linspace(*self.prior_spline_monotonicity_domain, - self.prior_spline_num_constraint_points) - cvcv_points = np.linspace(*self.prior_spline_convexity_domain, - self.prior_spline_num_constraint_points) + derval_points = np.linspace( + *self.prior_spline_derval_uniform_domain, + self.prior_spline_num_constraint_points, + ) + der2val_points = np.linspace( + *self.prior_spline_der2val_uniform_domain, + self.prior_spline_num_constraint_points, + ) + funval_points = np.linspace( + *self.prior_spline_funval_uniform_domain, + self.prior_spline_num_constraint_points, + ) + mono_points = np.linspace( + *self.prior_spline_monotonicity_domain, + self.prior_spline_num_constraint_points, + ) + cvcv_points = np.linspace( + *self.prior_spline_convexity_domain, + self.prior_spline_num_constraint_points, + ) tmp_val = np.array([[-np.inf], [0.0]]) index = 0 if self.use_spline_intercept else 1 # spline derval uniform priors - if not np.isinf(self.prior_spline_derval_uniform).all() and self.use_spline: - c_mat = np.vstack((c_mat, self.spline.design_dmat(derval_points, 1)[:, index:])) + if ( + not np.isinf(self.prior_spline_derval_uniform).all() + and self.use_spline + ): + c_mat = np.vstack( + (c_mat, self.spline.design_dmat(derval_points, 1)[:, index:]) + ) c_val = np.hstack((c_val, self.prior_spline_derval_uniform)) # spline der2val uniform priors - if not np.isinf(self.prior_spline_der2val_uniform).all() and self.use_spline: - c_mat = np.vstack((c_mat, self.spline.design_dmat(der2val_points, 2)[:, index:])) + if ( + not np.isinf(self.prior_spline_der2val_uniform).all() + and self.use_spline + ): + c_mat = np.vstack( + (c_mat, self.spline.design_dmat(der2val_points, 2)[:, index:]) + ) c_val = np.hstack((c_val, self.prior_spline_der2val_uniform)) # spline funval uniform priors - if not np.isinf(self.prior_spline_funval_uniform).all() and self.use_spline: - c_mat = np.vstack((c_mat, self.spline.design_mat(funval_points)[:, index:])) + if ( + not np.isinf(self.prior_spline_funval_uniform).all() + and self.use_spline + ): + c_mat = np.vstack( + (c_mat, self.spline.design_mat(funval_points)[:, index:]) + ) c_val = np.hstack((c_val, self.prior_spline_funval_uniform)) # spline monotonicity constraints if self.prior_spline_monotonicity is not None and self.use_spline: - sign = 1.0 if self.prior_spline_monotonicity == 'decreasing' else -1.0 - c_mat = np.vstack((c_mat, sign*self.spline.design_dmat(mono_points, 1)[:, index:])) - c_val = np.hstack((c_val, np.repeat(tmp_val, mono_points.size, axis=1))) + sign = ( + 1.0 if self.prior_spline_monotonicity == "decreasing" else -1.0 + ) + c_mat = np.vstack( + ( + c_mat, + sign * self.spline.design_dmat(mono_points, 1)[:, index:], + ) + ) + c_val = np.hstack( + (c_val, np.repeat(tmp_val, mono_points.size, axis=1)) + ) # spline convexity constraints if self.prior_spline_convexity is not None and self.use_spline: - sign = 1.0 if self.prior_spline_convexity == 'concave' else -1.0 - c_mat = np.vstack((c_mat, sign*self.spline.design_dmat(cvcv_points, 2)[:, index:])) - c_val = np.hstack((c_val, np.repeat(tmp_val, cvcv_points.size, axis=1))) + sign = 1.0 if self.prior_spline_convexity == "concave" else -1.0 + c_mat = np.vstack( + ( + c_mat, + sign * self.spline.design_dmat(cvcv_points, 2)[:, index:], + ) + ) + c_val = np.hstack( + (c_val, np.repeat(tmp_val, cvcv_points.size, axis=1)) + ) # spline maximum derivative constraints - if not np.isinf(self.prior_spline_maxder_uniform).all() and self.use_spline: + if ( + not np.isinf(self.prior_spline_maxder_uniform).all() + and self.use_spline + ): c_mat = np.vstack((c_mat, self.spline.last_dmat()[:, index:])) c_val = np.hstack((c_val, self.prior_spline_maxder_uniform)) # spline normalization prior if self.prior_spline_normalization is not None and self.use_spline: - mat = utils.avg_integral(self.prior_spline_normalization[:-1].T, - spline=self.spline, - use_spline_intercept=self.use_spline_intercept) + mat = utils.avg_integral( + self.prior_spline_normalization[:-1].T, + spline=self.spline, + use_spline_intercept=self.use_spline_intercept, + ) weights = self.prior_spline_normalization[-1] - weights = weights/weights.sum() + weights = weights / weights.sum() c_mat = np.vstack((c_mat, mat.T.dot(weights))) c_val = np.hstack((c_val, np.ones((2, 1)))) @@ -542,31 +681,55 @@ def create_regularization_mat(self) -> Tuple[np.ndarray, np.ndarray]: if not self.use_spline: return r_mat, r_val - derval_points = np.linspace(*self.prior_spline_derval_gaussian_domain, - self.prior_spline_num_constraint_points) - der2val_points = np.linspace(*self.prior_spline_der2val_uniform_domain, - self.prior_spline_num_constraint_points) - funval_points = np.linspace(*self.prior_spline_funval_gaussian_domain, - self.prior_spline_num_constraint_points) + derval_points = np.linspace( + *self.prior_spline_derval_gaussian_domain, + self.prior_spline_num_constraint_points, + ) + der2val_points = np.linspace( + *self.prior_spline_der2val_uniform_domain, + self.prior_spline_num_constraint_points, + ) + funval_points = np.linspace( + *self.prior_spline_funval_gaussian_domain, + self.prior_spline_num_constraint_points, + ) index = 0 if self.use_spline_intercept else 1 # spline derval gaussian priors - if not np.isinf(self.prior_spline_derval_gaussian[1]).all() and self.use_spline: - r_mat = np.vstack((r_mat, self.spline.design_dmat(derval_points, 1)[:, index:])) + if ( + not np.isinf(self.prior_spline_derval_gaussian[1]).all() + and self.use_spline + ): + r_mat = np.vstack( + (r_mat, self.spline.design_dmat(derval_points, 1)[:, index:]) + ) r_val = np.hstack((r_val, self.prior_spline_derval_gaussian)) # spline der2val gaussian priors - if not np.isinf(self.prior_spline_der2val_gaussian[1]).all() and self.use_spline: - r_mat = np.vstack((r_mat, self.spline.design_dmat(der2val_points, 2)[:, index:])) + if ( + not np.isinf(self.prior_spline_der2val_gaussian[1]).all() + and self.use_spline + ): + r_mat = np.vstack( + (r_mat, self.spline.design_dmat(der2val_points, 2)[:, index:]) + ) r_val = np.hstack((r_val, self.prior_spline_der2val_gaussian)) # spline funval gaussian priors - if not np.isinf(self.prior_spline_funval_gaussian[1]).all() and self.use_spline: - r_mat = np.vstack((r_mat, self.spline.design_mat(funval_points)[:, index:])) + if ( + not np.isinf(self.prior_spline_funval_gaussian[1]).all() + and self.use_spline + ): + r_mat = np.vstack( + (r_mat, self.spline.design_mat(funval_points)[:, index:]) + ) r_val = np.hstack((r_val, self.prior_spline_funval_gaussian)) # spline maximum derivative constraints - if not np.isinf(self.prior_spline_maxder_gaussian[1]).all() and self.use_spline: + if ( + not np.isinf(self.prior_spline_maxder_gaussian[1]).all() + and self.use_spline + ): r_mat = np.vstack((r_mat, self.spline.last_dmat()[:, index:])) r_val = np.hstack((r_val, self.prior_spline_maxder_gaussian)) @@ -575,8 +738,15 @@ def create_regularization_mat(self) -> Tuple[np.ndarray, np.ndarray]: @property def num_x_vars(self): if self.use_spline: - num_interior_knots = len(self.spline_knots_template) - (self.spline_l_linear + self.spline_r_linear) - n = num_interior_knots - 1 + self.spline_degree + (self.use_spline_intercept - 1) + num_interior_knots = len(self.spline_knots_template) - ( + self.spline_l_linear + self.spline_r_linear + ) + n = ( + num_interior_knots + - 1 + + self.spline_degree + + (self.use_spline_intercept - 1) + ) else: n = 1 return n @@ -596,11 +766,11 @@ def num_constraints(self): if not self.use_spline: return 0 else: - num_c = self.prior_spline_num_constraint_points*( - (self.prior_spline_monotonicity is not None) + - (self.prior_spline_convexity is not None) + num_c = self.prior_spline_num_constraint_points * ( + (self.prior_spline_monotonicity is not None) + + (self.prior_spline_convexity is not None) ) - num_c += (self.prior_spline_normalization is not None) + num_c += self.prior_spline_normalization is not None if not np.isinf(self.prior_spline_maxder_uniform).all(): num_c += self.prior_spline_maxder_uniform.shape[1] if not np.isinf(self.prior_spline_derval_uniform).all(): @@ -631,15 +801,13 @@ def num_regularizations(self): class LinearCovModel(CovModel): - """Linear Covariates Model. - """ + """Linear Covariates Model.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def create_x_fun(self, data: MRData): - """Create design function for the fixed effects. - """ + """Create design function for the fixed effects.""" alt_mat, ref_mat = self.create_design_mat(data) return utils.mat_to_fun(alt_mat, ref_mat=ref_mat) @@ -672,8 +840,7 @@ def create_z_mat(self, data): class LogCovModel(CovModel): - """Log Covariates Model. - """ + """Log Covariates Model.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -727,10 +894,14 @@ def create_constraint_mat(self, threshold=1e-6): index = 0 if self.use_spline_intercept else 1 tmp_val = np.array([[-shift + threshold], [np.inf]]) if self.use_spline: - points = np.linspace(self.spline.knots[0], - self.spline.knots[-1], - self.prior_spline_num_constraint_points) - c_mat = np.vstack((c_mat, self.spline.design_mat(points)[:, index:])) + points = np.linspace( + self.spline.knots[0], + self.spline.knots[-1], + self.prior_spline_num_constraint_points, + ) + c_mat = np.vstack( + (c_mat, self.spline.design_mat(points)[:, index:]) + ) c_val = np.hstack((c_val, np.repeat(tmp_val, points.size, axis=1))) return c_mat, c_val diff --git a/src/mrtool/core/data.py b/src/mrtool/core/data.py index e4464c7..f7c281a 100644 --- a/src/mrtool/core/data.py +++ b/src/mrtool/core/data.py @@ -1,10 +1,11 @@ # -*- coding: utf-8 -*- """ - data - ~~~~ +data +~~~~ - `data` module for `mrtool` package. +`data` module for `mrtool` package. """ + from typing import Dict, List, Union, Any import warnings from dataclasses import dataclass, field @@ -15,8 +16,8 @@ @dataclass class MRData: - """Data for simple linear mixed effects model. - """ + """Data for simple linear mixed effects model.""" + obs: np.ndarray = field(default_factory=empty_array) obs_se: np.ndarray = field(default_factory=empty_array) covs: Dict[str, np.ndarray] = field(default_factory=dict) @@ -27,14 +28,27 @@ class MRData: def __post_init__(self): self._check_attr_type() - self.obs = expand_array(self.obs, (self.num_points,), np.nan, 'obs') - self.obs_se = expand_array(self.obs_se, (self.num_points,), 1.0, 'obs_se') - self.study_id = expand_array(self.study_id, (self.num_points,), 'Unknown', 'study_id') - self.data_id = expand_array(self.data_id, (self.num_points,), np.arange(self.num_points), 'data_id') - assert len(np.unique(self.data_id)) == self.num_points, "data_id must be unique for each data point." - self.covs.update({'intercept': np.ones(self.num_points)}) + self.obs = expand_array(self.obs, (self.num_points,), np.nan, "obs") + self.obs_se = expand_array( + self.obs_se, (self.num_points,), 1.0, "obs_se" + ) + self.study_id = expand_array( + self.study_id, (self.num_points,), "Unknown", "study_id" + ) + self.data_id = expand_array( + self.data_id, + (self.num_points,), + np.arange(self.num_points), + "data_id", + ) + assert ( + len(np.unique(self.data_id)) == self.num_points + ), "data_id must be unique for each data point." + self.covs.update({"intercept": np.ones(self.num_points)}) for cov_name, cov in self.covs.items(): - assert len(cov) == self.num_points, f"covs[{cov_name}], inconsistent shape." + assert ( + len(cov) == self.num_points + ), f"covs[{cov_name}], inconsistent shape." self._remove_nan_in_covs() self._get_study_structure() @@ -42,32 +56,29 @@ def __post_init__(self): @property def num_points(self): - """Number of data points. - """ - return max([len(self.obs), len(self.obs_se), len(self.study_id)] + - [len(cov) for cov in self.covs.values()]) + """Number of data points.""" + return max( + [len(self.obs), len(self.obs_se), len(self.study_id)] + + [len(cov) for cov in self.covs.values()] + ) @property def num_obs(self): - """Number of observations. - """ + """Number of observations.""" return len(self.obs) @property def num_covs(self): - """Number of covariates. - """ + """Number of covariates.""" return len(self.covs) @property def num_studies(self): - """Number of studies. - """ + """Number of studies.""" return len(self.studies) def _check_attr_type(self): - """Check the type of the attributes. - """ + """Check the type of the attributes.""" assert isinstance(self.obs, np.ndarray) assert is_numeric_array(self.obs) assert isinstance(self.obs_se, np.ndarray) @@ -80,22 +91,32 @@ def _check_attr_type(self): assert is_numeric_array(cov) def _get_cov_scales(self): - """Compute the covariate scale. - """ + """Compute the covariate scale.""" if self.is_empty(): - self.cov_scales = {cov_name: np.nan for cov_name in self.covs.keys()} + self.cov_scales = { + cov_name: np.nan for cov_name in self.covs.keys() + } else: - self.cov_scales = {cov_name: np.max(np.abs(cov)) for cov_name, cov in self.covs.items()} - zero_covs = [cov_name for cov_name, cov_scale in self.cov_scales.items() if cov_scale == 0.0] + self.cov_scales = { + cov_name: np.max(np.abs(cov)) + for cov_name, cov in self.covs.items() + } + zero_covs = [ + cov_name + for cov_name, cov_scale in self.cov_scales.items() + if cov_scale == 0.0 + ] for cov_name in zero_covs: - warnings.warn(f"numbers in covariate[{cov_name}] are all zero. " - f"Please use this in spline range exposure or when preidct.") + warnings.warn( + f"numbers in covariate[{cov_name}] are all zero. " + f"Please use this in spline range exposure or when preidct." + ) def _get_study_structure(self): - """Get the study structure. - """ - self.studies, self.study_sizes = np.unique(self.study_id, - return_counts=True) + """Get the study structure.""" + self.studies, self.study_sizes = np.unique( + self.study_id, return_counts=True + ) self._sort_by_study_id() def _sort_data(self, index: np.ndarray): @@ -105,7 +126,9 @@ def _sort_data(self, index: np.ndarray): index (np.ndarray): Sorting index. """ index = np.array(index) - assert (np.sort(index) == np.arange(self.num_obs)).all(), "Sorting index must go from 0 to num_obs - 1." + assert ( + np.sort(index) == np.arange(self.num_obs) + ).all(), "Sorting index must go from 0 to num_obs - 1." self.obs = self.obs[index] self.obs_se = self.obs_se[index] for cov_name, cov in self.covs.items(): @@ -114,28 +137,27 @@ def _sort_data(self, index: np.ndarray): self.data_id = self.data_id[index] def _sort_by_study_id(self): - """Sort data by study_id. - """ + """Sort data by study_id.""" if not self.is_empty() and self.num_studies != 1: sort_index = np.argsort(self.study_id) self._sort_data(sort_index) def _sort_by_data_id(self): - """Sort data by data_id. - """ + """Sort data by data_id.""" if not self.is_empty(): sort_index = np.argsort(self.data_id) self._sort_data(sort_index) def _remove_nan_in_covs(self): - """Remove potential nans in covaraites. - """ + """Remove potential nans in covaraites.""" if not self.is_empty(): index = np.full(self.num_obs, False) for cov_name, cov in self.covs.items(): cov_index = np.isnan(cov) if cov_index.any(): - warnings.warn(f"There are {cov_index.sum()} nans in covaraite {cov_name}.") + warnings.warn( + f"There are {cov_index.sum()} nans in covaraite {cov_name}." + ) index = index | cov_index self._remove_data(index) @@ -156,7 +178,7 @@ def _remove_data(self, index: np.ndarray): self.study_id = self.study_id[keep_index] self.data_id = self.data_id[keep_index] - def _get_data(self, index: np.ndarray) -> 'MRData': + def _get_data(self, index: np.ndarray) -> "MRData": """Get the data point by index. Args: @@ -176,19 +198,18 @@ def _get_data(self, index: np.ndarray) -> 'MRData': return MRData(obs, obs_se, covs, study_id, data_id) def is_empty(self) -> bool: - """Return true when object contain data. - """ + """Return true when object contain data.""" return self.num_points == 0 def _assert_not_empty(self): - """Raise ValueError when object is empty. - """ + """Raise ValueError when object is empty.""" if self.is_empty(): raise ValueError("MRData object is empty.") - def is_cov_normalized(self, covs: Union[List[str], str, None] = None) -> bool: - """Return true when covariates are normalized. - """ + def is_cov_normalized( + self, covs: Union[List[str], str, None] = None + ) -> bool: + """Return true when covariates are normalized.""" if covs is None: covs = list(self.covs.keys()) else: @@ -196,68 +217,96 @@ def is_cov_normalized(self, covs: Union[List[str], str, None] = None) -> bool: assert self.has_covs(covs) ok = not self.is_empty() for cov_name in covs: - ok = ok and ((not is_numeric_array(self.covs[cov_name])) or - (np.max(np.abs(self.covs[cov_name])) == 1.0)) + ok = ok and ( + (not is_numeric_array(self.covs[cov_name])) + or (np.max(np.abs(self.covs[cov_name])) == 1.0) + ) return ok def reset(self): - """Reset all the attributes to default values. - """ + """Reset all the attributes to default values.""" self.obs = empty_array() self.obs_se = empty_array() self.covs = dict() - self.covs['intercept'] = np.ones(0) + self.covs["intercept"] = np.ones(0) self.study_id = empty_array() self.data_id = empty_array() - def load_df(self, data: pd.DataFrame, - col_obs: Union[str, None] = None, - col_obs_se: Union[str, None] = None, - col_covs: Union[List[str], None] = None, - col_study_id: Union[str, None] = None, - col_data_id: Union[str, None] = None): - """Load data from data frame. - """ + def load_df( + self, + data: pd.DataFrame, + col_obs: Union[str, None] = None, + col_obs_se: Union[str, None] = None, + col_covs: Union[List[str], None] = None, + col_study_id: Union[str, None] = None, + col_data_id: Union[str, None] = None, + ): + """Load data from data frame.""" self.reset() - self.obs = empty_array() if col_obs is None else data[col_obs].to_numpy() - self.obs_se = empty_array() if col_obs_se is None else data[col_obs_se].to_numpy() - self.study_id = empty_array() if col_study_id is None else data[col_study_id].to_numpy() - self.data_id = empty_array() if col_data_id is None else data[col_data_id].to_numpy() - self.covs = dict() if col_covs is None else {cov_name: data[cov_name].to_numpy() - for cov_name in col_covs} + self.obs = ( + empty_array() if col_obs is None else data[col_obs].to_numpy() + ) + self.obs_se = ( + empty_array() if col_obs_se is None else data[col_obs_se].to_numpy() + ) + self.study_id = ( + empty_array() + if col_study_id is None + else data[col_study_id].to_numpy() + ) + self.data_id = ( + empty_array() + if col_data_id is None + else data[col_data_id].to_numpy() + ) + self.covs = ( + dict() + if col_covs is None + else {cov_name: data[cov_name].to_numpy() for cov_name in col_covs} + ) self.__post_init__() - def load_xr(self, data, - var_obs: Union[str, None] = None, - var_obs_se: Union[str, None] = None, - var_covs: Union[List[str], None] = None, - coord_study_id: Union[str, None] = None): - """Load data from xarray. - """ + def load_xr( + self, + data, + var_obs: Union[str, None] = None, + var_obs_se: Union[str, None] = None, + var_covs: Union[List[str], None] = None, + coord_study_id: Union[str, None] = None, + ): + """Load data from xarray.""" self.reset() - self.obs = empty_array() if var_obs is None else data[var_obs].data.flatten() - self.obs_se = empty_array() if var_obs_se is None else data[var_obs_se].data.flatten() + self.obs = ( + empty_array() if var_obs is None else data[var_obs].data.flatten() + ) + self.obs_se = ( + empty_array() + if var_obs_se is None + else data[var_obs_se].data.flatten() + ) if coord_study_id is None: self.study_id = empty_array() else: index = data.coords.to_index().to_frame(index=False) self.study_id = index[coord_study_id].to_numpy() - self.covs = dict() if var_covs is None else {cov_name: data[cov_name].data.flatten() - for cov_name in var_covs} + self.covs = ( + dict() + if var_covs is None + else { + cov_name: data[cov_name].data.flatten() for cov_name in var_covs + } + ) self.__post_init__() def to_df(self) -> pd.DataFrame: - """Convert data object to data frame. - """ - df = pd.DataFrame({ - 'obs': self.obs, - 'obs_se': self.obs_se, - 'study_id': self.study_id - }) + """Convert data object to data frame.""" + df = pd.DataFrame( + {"obs": self.obs, "obs_se": self.obs_se, "study_id": self.study_id} + ) for cov_name in self.covs: df[cov_name] = self.covs[cov_name] @@ -296,20 +345,24 @@ def has_studies(self, studies: Union[List[Any], Any]) -> bool: return all([study in self.studies for study in studies]) def _assert_has_covs(self, covs: Union[List[str], str]): - """Assert has covariates otherwise raise ValueError. - """ + """Assert has covariates otherwise raise ValueError.""" if not self.has_covs(covs): covs = to_list(covs) missing_covs = [cov for cov in covs if cov not in self.covs] - raise ValueError(f"MRData object do not contain covariates: {missing_covs}.") + raise ValueError( + f"MRData object do not contain covariates: {missing_covs}." + ) def _assert_has_studies(self, studies: Union[List[Any], Any]): - """Assert has studies otherwise raise ValueError. - """ + """Assert has studies otherwise raise ValueError.""" if not self.has_studies(studies): studies = to_list(studies) - missing_studies = [study for study in studies if study not in self.studies] - raise ValueError(f"MRData object do not contain studies: {missing_studies}.") + missing_studies = [ + study for study in studies if study not in self.studies + ] + raise ValueError( + f"MRData object do not contain studies: {missing_studies}." + ) def get_covs(self, covs: Union[List[str], str]) -> np.ndarray: """Get covariate matrix. @@ -326,9 +379,11 @@ def get_covs(self, covs: Union[List[str], str]) -> np.ndarray: if len(covs) == 0: return np.array([]).reshape(self.num_obs, 0) else: - return np.hstack([self.covs[cov_names][:, None] for cov_names in covs]) + return np.hstack( + [self.covs[cov_names][:, None] for cov_names in covs] + ) - def get_study_data(self, studies: Union[List[Any], Any]) -> 'MRData': + def get_study_data(self, studies: Union[List[Any], Any]) -> "MRData": """Get study specific data. Args: @@ -343,8 +398,7 @@ def get_study_data(self, studies: Union[List[Any], Any]) -> 'MRData': return self._get_data(index) def normalize_covs(self, covs: Union[List[str], str, None] = None): - """Normalize covariates by the largest absolute value for each covariate. - """ + """Normalize covariates by the largest absolute value for each covariate.""" if covs is None: covs = list(self.covs.keys()) else: @@ -353,11 +407,12 @@ def normalize_covs(self, covs: Union[List[str], str, None] = None): if not self.is_empty(): for cov_name in covs: if is_numeric_array(self.covs[cov_name]): - self.covs[cov_name] = self.covs[cov_name]/self.cov_scales[cov_name] + self.covs[cov_name] = ( + self.covs[cov_name] / self.cov_scales[cov_name] + ) def __repr__(self): - """Summary of the object. - """ + """Summary of the object.""" dimension_summary = [ "number of observations: %i" % self.num_obs, "number of covariates : %i" % self.num_covs, diff --git a/src/mrtool/core/model.py b/src/mrtool/core/model.py index 4c28110..29fdefb 100644 --- a/src/mrtool/core/model.py +++ b/src/mrtool/core/model.py @@ -1,10 +1,11 @@ # -*- coding: utf-8 -*- """ - model - ~~~~~ +model +~~~~~ - Model module for mrtool package. +Model module for mrtool package. """ + from copy import deepcopy from typing import List, Tuple, Union @@ -19,12 +20,11 @@ class MRBRT: - """MR-BRT Object - """ + """MR-BRT Object""" - def __init__(self, data: MRData, - cov_models: List[CovModel], - inlier_pct: float = 1.0): + def __init__( + self, data: MRData, cov_models: List[CovModel], inlier_pct: float = 1.0 + ): """Constructor of MRBRT. Args: @@ -37,9 +37,7 @@ def __init__(self, data: MRData, self.cov_models = cov_models self.inlier_pct = inlier_pct self.check_input() - self.cov_model_names = [ - cov_model.name for cov_model in self.cov_models - ] + self.cov_model_names = [cov_model.name for cov_model in self.cov_models] self.num_cov_models = len(self.cov_models) self.cov_names = [] for cov_model in self.cov_models: @@ -53,28 +51,30 @@ def __init__(self, data: MRData, self.cov_models[0]._process_priors() # fixed effects size and index - self.x_vars_sizes = [cov_model.num_x_vars for cov_model in self.cov_models] + self.x_vars_sizes = [ + cov_model.num_x_vars for cov_model in self.cov_models + ] self.x_vars_indices = utils.sizes_to_indices(self.x_vars_sizes) self.num_x_vars = sum(self.x_vars_sizes) # random effects size and index - self.z_vars_sizes = [cov_model.num_z_vars for cov_model in self.cov_models] + self.z_vars_sizes = [ + cov_model.num_z_vars for cov_model in self.cov_models + ] self.z_vars_indices = utils.sizes_to_indices(self.z_vars_sizes) self.num_z_vars = sum(self.z_vars_sizes) self.num_vars = self.num_x_vars + self.num_z_vars # number of constraints - self.num_constraints = sum([ - cov_model.num_constraints - for cov_model in self.cov_models - ]) + self.num_constraints = sum( + [cov_model.num_constraints for cov_model in self.cov_models] + ) # number of regularizations - self.num_regularizations = sum([ - cov_model.num_regularizations - for cov_model in self.cov_models - ]) + self.num_regularizations = sum( + [cov_model.num_regularizations for cov_model in self.cov_models] + ) # place holder for the limetr objective self.lt = None @@ -85,130 +85,139 @@ def __init__(self, data: MRData, self.re_soln = None def attach_data(self, data=None): - """Attach data to cov_model. - """ + """Attach data to cov_model.""" data = self.data if data is None else data # attach data to cov_model for cov_model in self.cov_models: cov_model.attach_data(data) def check_input(self): - """Check the input type of the attributes. - """ + """Check the input type of the attributes.""" assert isinstance(self.data, MRData) assert isinstance(self.cov_models, list) - assert all([isinstance(cov_model, CovModel) - for cov_model in self.cov_models]) + assert all( + [isinstance(cov_model, CovModel) for cov_model in self.cov_models] + ) assert (self.inlier_pct >= 0.0) and (self.inlier_pct <= 1.0) def get_cov_model(self, name: str) -> CovModel: - """Choose covariate model with name. - """ + """Choose covariate model with name.""" index = self.get_cov_model_index(name) return self.cov_models[index] def get_cov_model_index(self, name: str) -> int: - """From cov_model name get the index. - """ - matching_index = [index for index, cov_model_name in enumerate(self.cov_model_names) - if cov_model_name == name] + """From cov_model name get the index.""" + matching_index = [ + index + for index, cov_model_name in enumerate(self.cov_model_names) + if cov_model_name == name + ] num_matching_index = len(matching_index) - assert num_matching_index == 1, f"Number of matching index is {num_matching_index}." + assert ( + num_matching_index == 1 + ), f"Number of matching index is {num_matching_index}." return matching_index[0] def create_x_fun(self, data=None): - """Create the fixed effects function, link with limetr. - """ + """Create the fixed effects function, link with limetr.""" data = self.data if data is None else data # create design functions design_funs = [ - cov_model.create_x_fun(data) - for cov_model in self.cov_models + cov_model.create_x_fun(data) for cov_model in self.cov_models ] funs, jac_funs = list(zip(*design_funs)) def x_fun(beta, funs=funs): - return sum(fun(beta[self.x_vars_indices[i]]) - for i, fun in enumerate(funs)) + return sum( + fun(beta[self.x_vars_indices[i]]) for i, fun in enumerate(funs) + ) def x_jac_fun(beta, jac_funs=jac_funs): - return np.hstack([jac_fun(beta[self.x_vars_indices[i]]) - for i, jac_fun in enumerate(jac_funs)]) + return np.hstack( + [ + jac_fun(beta[self.x_vars_indices[i]]) + for i, jac_fun in enumerate(jac_funs) + ] + ) return x_fun, x_jac_fun def create_z_mat(self, data=None): - """Create the random effects matrix, link with limetr. - """ + """Create the random effects matrix, link with limetr.""" data = self.data if data is None else data - mat = np.hstack([cov_model.create_z_mat(data) - for cov_model in self.cov_models]) + mat = np.hstack( + [cov_model.create_z_mat(data) for cov_model in self.cov_models] + ) return mat def create_c_mat(self): - """Create the constraints matrices. - """ + """Create the constraints matrices.""" c_mat = np.zeros((0, self.num_vars)) c_vec = np.zeros((2, 0)) for i, cov_model in enumerate(self.cov_models): if cov_model.num_constraints != 0: c_mat_sub = np.zeros((cov_model.num_constraints, self.num_vars)) - c_mat_sub[:, self.x_vars_indices[i]], c_vec_sub = cov_model.create_constraint_mat() + c_mat_sub[:, self.x_vars_indices[i]], c_vec_sub = ( + cov_model.create_constraint_mat() + ) c_mat = np.vstack((c_mat, c_mat_sub)) c_vec = np.hstack((c_vec, c_vec_sub)) return c_mat, c_vec def create_h_mat(self): - """Create the regularizer matrices. - """ + """Create the regularizer matrices.""" h_mat = np.zeros((0, self.num_vars)) h_vec = np.zeros((2, 0)) for i, cov_model in enumerate(self.cov_models): if cov_model.num_regularizations != 0: - h_mat_sub = np.zeros((cov_model.num_regularizations, self.num_vars)) - h_mat_sub[:, self.x_vars_indices[i]], h_vec_sub = cov_model.create_regularization_mat() + h_mat_sub = np.zeros( + (cov_model.num_regularizations, self.num_vars) + ) + h_mat_sub[:, self.x_vars_indices[i]], h_vec_sub = ( + cov_model.create_regularization_mat() + ) h_mat = np.vstack((h_mat, h_mat_sub)) h_vec = np.hstack((h_vec, h_vec_sub)) return h_mat, h_vec def create_uprior(self): - """Create direct uniform prior. - """ - uprior = np.array([[-np.inf]*self.num_vars, - [np.inf]*self.num_vars]) + """Create direct uniform prior.""" + uprior = np.array([[-np.inf] * self.num_vars, [np.inf] * self.num_vars]) for i, cov_model in enumerate(self.cov_models): uprior[:, self.x_vars_indices[i]] = cov_model.prior_beta_uniform - uprior[:, self.z_vars_indices[i] + self.num_x_vars] = cov_model.prior_gamma_uniform + uprior[:, self.z_vars_indices[i] + self.num_x_vars] = ( + cov_model.prior_gamma_uniform + ) return uprior def create_gprior(self): - """Create direct gaussian prior. - """ - gprior = np.array([[0]*self.num_vars, - [np.inf]*self.num_vars]) + """Create direct gaussian prior.""" + gprior = np.array([[0] * self.num_vars, [np.inf] * self.num_vars]) for i, cov_model in enumerate(self.cov_models): gprior[:, self.x_vars_indices[i]] = cov_model.prior_beta_gaussian - gprior[:, self.z_vars_indices[i] + self.num_x_vars] = cov_model.prior_gamma_gaussian + gprior[:, self.z_vars_indices[i] + self.num_x_vars] = ( + cov_model.prior_gamma_gaussian + ) return gprior def create_lprior(self): - """Create direct laplace prior. - """ - lprior = np.array([[0]*self.num_vars, - [np.inf]*self.num_vars]) + """Create direct laplace prior.""" + lprior = np.array([[0] * self.num_vars, [np.inf] * self.num_vars]) for i, cov_model in enumerate(self.cov_models): lprior[:, self.x_vars_indices[i]] = cov_model.prior_beta_laplace - lprior[:, self.z_vars_indices[i] + self.num_x_vars] = cov_model.prior_gamma_laplace + lprior[:, self.z_vars_indices[i] + self.num_x_vars] = ( + cov_model.prior_gamma_laplace + ) return lprior @@ -246,11 +255,11 @@ def fit_model(self, **fit_options): h_fun, h_fun_jac = utils.mat_to_fun(h_mat) uprior = self.create_uprior() - uprior[:, self.num_x_vars:self.num_vars] *= z_scale**2 + uprior[:, self.num_x_vars : self.num_vars] *= z_scale**2 gprior = self.create_gprior() - gprior[:, self.num_x_vars:self.num_vars] *= z_scale**2 + gprior[:, self.num_x_vars : self.num_vars] *= z_scale**2 lprior = self.create_lprior() - lprior[:, self.num_x_vars:self.num_vars] *= z_scale**2 + lprior[:, self.num_x_vars : self.num_vars] *= z_scale**2 if np.isneginf(uprior[0]).all() and np.isposinf(uprior[1]).all(): uprior = None @@ -260,20 +269,34 @@ def fit_model(self, **fit_options): lprior = None # create limetr object - self.lt = LimeTr(n, k_beta, k_gamma, - y, x_fun, x_fun_jac, z_mat, S=s, - C=c_fun, JC=c_fun_jac, c=c_vec, - H=h_fun, JH=h_fun_jac, h=h_vec, - uprior=uprior, gprior=gprior, lprior=lprior, - inlier_percentage=self.inlier_pct) + self.lt = LimeTr( + n, + k_beta, + k_gamma, + y, + x_fun, + x_fun_jac, + z_mat, + S=s, + C=c_fun, + JC=c_fun_jac, + c=c_vec, + H=h_fun, + JH=h_fun_jac, + h=h_vec, + uprior=uprior, + gprior=gprior, + lprior=lprior, + inlier_percentage=self.inlier_pct, + ) self.lt.fit(**fit_options) self.lt.Z *= z_scale - if hasattr(self.lt, 'gprior'): + if hasattr(self.lt, "gprior"): self.lt.gprior[:, self.lt.idx_gamma] /= z_scale**2 - if hasattr(self.lt, 'uprior'): + if hasattr(self.lt, "uprior"): self.lt.uprior[:, self.lt.idx_gamma] /= z_scale**2 - if hasattr(self.lt, 'lprior'): + if hasattr(self.lt, "lprior"): self.lt.lprior[:, self.lt.idx_gamma] /= z_scale**2 self.lt.gamma /= z_scale**2 @@ -282,31 +305,40 @@ def fit_model(self, **fit_options): self.w_soln = self.lt.w.copy() self.u_soln = self.lt.estimate_re() self.fe_soln = { - cov_name: self.beta_soln[self.x_vars_indices[self.get_cov_model_index(cov_name)]] + cov_name: self.beta_soln[ + self.x_vars_indices[self.get_cov_model_index(cov_name)] + ] for cov_name in self.cov_model_names } self.re_soln = { - study: self.u_soln[i] - for i, study in enumerate(self.data.studies) + study: self.u_soln[i] for i, study in enumerate(self.data.studies) } self.re_var_soln = { - cov_name: self.gamma_soln[self.z_vars_indices[self.get_cov_model_index(cov_name)]] + cov_name: self.gamma_soln[ + self.z_vars_indices[self.get_cov_model_index(cov_name)] + ] for cov_name in self.cov_model_names if self.cov_models[self.get_cov_model_index(cov_name)].use_re } def extract_re(self, study_id: NDArray) -> NDArray: - """Extract the random effect for a given dataset. - """ - re = np.vstack([ - self.re_soln[study] if study in self.re_soln else np.zeros(self.num_z_vars) - for study in study_id - ]) + """Extract the random effect for a given dataset.""" + re = np.vstack( + [ + self.re_soln[study] + if study in self.re_soln + else np.zeros(self.num_z_vars) + for study in study_id + ] + ) return re - def predict(self, data: MRData, - predict_for_study: bool = False, - sort_by_data_id: bool = False) -> NDArray: + def predict( + self, + data: MRData, + predict_for_study: bool = False, + sort_by_data_id: bool = False, + ) -> NDArray: """Create new prediction with existing solution. Args: @@ -322,13 +354,15 @@ def predict(self, data: MRData, Returns: NDArray: Predicted outcome array. """ - assert data.has_covs(self.cov_names), "Prediction data do not have covariates used for fitting." + assert data.has_covs( + self.cov_names + ), "Prediction data do not have covariates used for fitting." x_fun, _ = self.create_x_fun(data=data) prediction = x_fun(self.beta_soln) if predict_for_study: z_mat = self.create_z_mat(data=data) re = self.extract_re(data.study_id) - prediction += np.sum(z_mat*re, axis=1) + prediction += np.sum(z_mat * re, axis=1) if sort_by_data_id: prediction = prediction[np.argsort(data.data_id)] @@ -346,7 +380,7 @@ def sample_soln(self, sample_size: int = 1) -> Tuple[NDArray, NDArray]: Return beta samples and gamma samples. """ if self.lt is None: - raise ValueError('Please fit the model first.') + raise ValueError("Please fit the model first.") beta_soln_samples = self.lt.sample_beta(size=sample_size) gamma_soln_samples = np.repeat( @@ -355,12 +389,14 @@ def sample_soln(self, sample_size: int = 1) -> Tuple[NDArray, NDArray]: return beta_soln_samples, gamma_soln_samples - def create_draws(self, - data: MRData, - beta_samples: NDArray, - gamma_samples: NDArray, - random_study: bool = True, - sort_by_data_id: bool = False) -> NDArray: + def create_draws( + self, + data: MRData, + beta_samples: NDArray, + gamma_samples: NDArray, + random_study: bool = True, + sort_by_data_id: bool = False, + ) -> NDArray: """Create draws for the given data set. Args: @@ -383,14 +419,18 @@ def create_draws(self, x_fun, x_jac_fun = self.create_x_fun(data=data) z_mat = self.create_z_mat(data=data) - y_samples = np.vstack([x_fun(beta_sample) for beta_sample in beta_samples]) + y_samples = np.vstack( + [x_fun(beta_sample) for beta_sample in beta_samples] + ) if random_study: - u_samples = np.random.randn(sample_size, self.num_z_vars)*np.sqrt(gamma_samples) + u_samples = np.random.randn(sample_size, self.num_z_vars) * np.sqrt( + gamma_samples + ) y_samples += u_samples.dot(z_mat.T) else: re = self.extract_re(data.study_id) - y_samples += np.sum(z_mat*re, axis=1) + y_samples += np.sum(z_mat * re, axis=1) if sort_by_data_id: y_samples = y_samples[:, np.argsort(data.data_id)] @@ -398,8 +438,7 @@ def create_draws(self, return y_samples.T def summary(self) -> Tuple[pd.DataFrame, pd.DataFrame]: - """Return the summary data frame. - """ + """Return the summary data frame.""" fe = pd.DataFrame(utils.ravel_dict(self.fe_soln), index=[0]) re_var = pd.DataFrame(utils.ravel_dict(self.re_var_soln), index=[0]) @@ -407,15 +446,16 @@ def summary(self) -> Tuple[pd.DataFrame, pd.DataFrame]: class MRBeRT: - """Ensemble model of MRBRT. - """ - - def __init__(self, - data: MRData, - ensemble_cov_model: CovModel, - ensemble_knots: NDArray, - cov_models: Union[List[CovModel], None] = None, - inlier_pct: float = 1.0): + """Ensemble model of MRBRT.""" + + def __init__( + self, + data: MRData, + ensemble_cov_model: CovModel, + ensemble_knots: NDArray, + cov_models: Union[List[CovModel], None] = None, + inlier_pct: float = 1.0, + ): """Constructor of `MRBeRT` Args: @@ -442,11 +482,15 @@ def __init__(self, for knots in self.ensemble_knots: ensemble_cov_model = deepcopy(cov_model_tmp) ensemble_cov_model.spline_knots_template = knots.copy() - self.sub_models.append(MRBRT(data, - cov_models=[*self.cov_models, ensemble_cov_model], - inlier_pct=self.inlier_pct)) + self.sub_models.append( + MRBRT( + data, + cov_models=[*self.cov_models, ensemble_cov_model], + inlier_pct=self.inlier_pct, + ) + ) - self.weights = np.ones(self.num_sub_models)/self.num_sub_models + self.weights = np.ones(self.num_sub_models) / self.num_sub_models # inherent the dimension variable self.num_x_vars = self.sub_models[0].num_x_vars @@ -456,71 +500,81 @@ def __init__(self, self.num_regularizations = self.sub_models[0].num_regularizations self.num_cov_models = self.sub_models[0].num_cov_models - def fit_model(self, - scores_weights=np.array([1.0, 1.0]), - slopes=np.array([2.0, 10.0]), - quantiles=np.array([0.4, 0.4]), - **fit_options): - """Fitting the model through limetr. - """ + def fit_model( + self, + scores_weights=np.array([1.0, 1.0]), + slopes=np.array([2.0, 10.0]), + quantiles=np.array([0.4, 0.4]), + **fit_options, + ): + """Fitting the model through limetr.""" for sub_model in self.sub_models: sub_model.fit_model(**fit_options) - self.score_model(scores_weights=scores_weights, - slopes=slopes, - quantiles=quantiles) - - self.w_soln = np.vstack([ - sub_model.w_soln for sub_model in self.sub_models - ]).T.dot(self.weights) + self.score_model( + scores_weights=scores_weights, slopes=slopes, quantiles=quantiles + ) - def score_model(self, - scores_weights=np.array([1.0, 1.0]), - slopes=np.array([2.0, 10.0]), - quantiles=np.array([0.4, 0.4])): - """Score the model by there fitting and variation. - """ + self.w_soln = np.vstack( + [sub_model.w_soln for sub_model in self.sub_models] + ).T.dot(self.weights) + + def score_model( + self, + scores_weights=np.array([1.0, 1.0]), + slopes=np.array([2.0, 10.0]), + quantiles=np.array([0.4, 0.4]), + ): + """Score the model by there fitting and variation.""" scores = np.zeros((2, self.num_sub_models)) for i, sub_model in enumerate(self.sub_models): scores[0][i] = score_sub_models_datafit(sub_model) - scores[1][i] = score_sub_models_variation(sub_model, - self.ensemble_cov_model_name, n=3) + scores[1][i] = score_sub_models_variation( + sub_model, self.ensemble_cov_model_name, n=3 + ) weights = np.zeros(scores.shape) for i in range(2): - weights[i] = utils.nonlinear_trans( - scores[i], - slope=slopes[i], - quantile=quantiles[i] - )**scores_weights[i] + weights[i] = ( + utils.nonlinear_trans( + scores[i], slope=slopes[i], quantile=quantiles[i] + ) + ** scores_weights[i] + ) weights = np.prod(weights, axis=0) - self.weights = weights/np.sum(weights) + self.weights = weights / np.sum(weights) - def sample_soln(self, - sample_size: int = 1) -> Tuple[List[NDArray], List[NDArray]]: - """Sample solution. - """ + def sample_soln( + self, sample_size: int = 1 + ) -> Tuple[List[NDArray], List[NDArray]]: + """Sample solution.""" sample_sizes = np.random.multinomial(sample_size, self.weights) beta_samples = [] gamma_samples = [] for i, sub_model in enumerate(self.sub_models): if sample_sizes[i] != 0: - sub_beta_samples, sub_gamma_samples = \ - sub_model.sample_soln(sample_size=sample_sizes[i]) + sub_beta_samples, sub_gamma_samples = sub_model.sample_soln( + sample_size=sample_sizes[i] + ) else: sub_beta_samples = np.array([]).reshape(0, sub_model.num_x_vars) - sub_gamma_samples = np.array([]).reshape(0, sub_model.num_z_vars) + sub_gamma_samples = np.array([]).reshape( + 0, sub_model.num_z_vars + ) beta_samples.append(sub_beta_samples) gamma_samples.append(sub_gamma_samples) return beta_samples, gamma_samples - def predict(self, data: MRData, - predict_for_study: bool = False, - sort_by_data_id: bool = False, - return_avg: bool = True) -> NDArray: + def predict( + self, + data: MRData, + predict_for_study: bool = False, + sort_by_data_id: bool = False, + return_avg: bool = True, + ) -> NDArray: """Create new prediction with existing solution. Args: @@ -528,63 +582,88 @@ def predict(self, data: MRData, When it is `True`, the function will return an average prediction based on the score, and when it is `False` the function will return a list of predictions from all groups. """ - prediction = np.vstack([ - sub_model.predict(data, - predict_for_study=predict_for_study, - sort_by_data_id=sort_by_data_id) - for sub_model in self.sub_models - ]) + prediction = np.vstack( + [ + sub_model.predict( + data, + predict_for_study=predict_for_study, + sort_by_data_id=sort_by_data_id, + ) + for sub_model in self.sub_models + ] + ) if return_avg: prediction = prediction.T.dot(self.weights) return prediction - def create_draws(self, - data: MRData, - beta_samples: List[NDArray], - gamma_samples: List[NDArray], - random_study: bool = True, - sort_by_data_id: bool = False) -> NDArray: + def create_draws( + self, + data: MRData, + beta_samples: List[NDArray], + gamma_samples: List[NDArray], + random_study: bool = True, + sort_by_data_id: bool = False, + ) -> NDArray: """Create draws. For function description please check `create_draws` for `MRBRT`. """ - sample_sizes = [sub_beta_samples.shape[0] for sub_beta_samples in beta_samples] + sample_sizes = [ + sub_beta_samples.shape[0] for sub_beta_samples in beta_samples + ] for i in range(self.num_sub_models): - assert beta_samples[i].shape == (sample_sizes[i], self.sub_models[0].num_x_vars) - assert gamma_samples[i].shape == (sample_sizes[i], self.sub_models[0].num_z_vars) + assert beta_samples[i].shape == ( + sample_sizes[i], + self.sub_models[0].num_x_vars, + ) + assert gamma_samples[i].shape == ( + sample_sizes[i], + self.sub_models[0].num_z_vars, + ) y_samples = [] for i in range(self.num_sub_models): sub_beta_samples = beta_samples[i] sub_gamma_samples = gamma_samples[i] if sub_beta_samples.size != 0: - y_samples.append(self.sub_models[i].create_draws( - data, - beta_samples=sub_beta_samples, - gamma_samples=sub_gamma_samples, - random_study=random_study, - sort_by_data_id=sort_by_data_id - )) + y_samples.append( + self.sub_models[i].create_draws( + data, + beta_samples=sub_beta_samples, + gamma_samples=sub_gamma_samples, + random_study=random_study, + sort_by_data_id=sort_by_data_id, + ) + ) y_samples = np.hstack(y_samples) return y_samples def summary(self) -> Tuple[pd.DataFrame, pd.DataFrame]: - """Create summary data frame. - """ + """Create summary data frame.""" summary_list = [sub_model.summary() for sub_model in self.sub_models] fe = pd.concat([summary_list[i][0] for i in range(self.num_sub_models)]) fe.loc[self.num_sub_models] = fe.values.T.dot(self.weights) fe.reset_index(inplace=True, drop=True) - fe.insert(0, 'model_id', np.hstack((np.arange(self.num_sub_models), 'average'))) - fe['weights'] = np.hstack((self.weights, np.nan)) + fe.insert( + 0, + "model_id", + np.hstack((np.arange(self.num_sub_models), "average")), + ) + fe["weights"] = np.hstack((self.weights, np.nan)) - re_var = pd.concat([summary_list[i][1] for i in range(self.num_sub_models)]) + re_var = pd.concat( + [summary_list[i][1] for i in range(self.num_sub_models)] + ) re_var.loc[self.num_sub_models] = re_var.values.T.dot(self.weights) re_var.reset_index(inplace=True, drop=True) - re_var.insert(0, 'model_id', np.hstack((np.arange(self.num_sub_models), 'average'))) - re_var['weights'] = np.hstack((self.weights, np.nan)) + re_var.insert( + 0, + "model_id", + np.hstack((np.arange(self.num_sub_models), "average")), + ) + re_var["weights"] = np.hstack((self.weights, np.nan)) return fe, re_var @@ -592,17 +671,17 @@ def summary(self) -> Tuple[pd.DataFrame, pd.DataFrame]: def score_sub_models_datafit(mr: MRBRT): """score the result of mrbert""" if mr.lt.soln is None: - raise ValueError('Must optimize MRBRT first.') + raise ValueError("Must optimize MRBRT first.") return -mr.lt.objective(mr.lt.soln) -def score_sub_models_variation(mr: MRBRT, - ensemble_cov_model_name: str, - n: int = 1) -> float: +def score_sub_models_variation( + mr: MRBRT, ensemble_cov_model_name: str, n: int = 1 +) -> float: """score the result of mrbert""" if mr.lt.soln is None: - raise ValueError('Must optimize MRBRT first.') + raise ValueError("Must optimize MRBRT first.") index = mr.get_cov_model_index(ensemble_cov_model_name) cov_model = mr.cov_models[index] diff --git a/src/mrtool/core/plots.py b/src/mrtool/core/plots.py index 6822c51..49bbc7c 100644 --- a/src/mrtool/core/plots.py +++ b/src/mrtool/core/plots.py @@ -6,9 +6,18 @@ from mrtool import MRData -def plot_risk_function(mrbrt, pair, beta_samples, gamma_samples, alt_cov_names=None, - ref_cov_names=None, continuous_variables=[], plot_note=None, plots_dir=None, - write_file=False): +def plot_risk_function( + mrbrt, + pair, + beta_samples, + gamma_samples, + alt_cov_names=None, + ref_cov_names=None, + continuous_variables=[], + plot_note=None, + plots_dir=None, + write_file=False, +): """Plot predicted relative risk. Args: mrbrt (mrtool.MRBRT): @@ -42,12 +51,14 @@ def plot_risk_function(mrbrt, pair, beta_samples, gamma_samples, alt_cov_names=N max_cov = knots[-1] dose_grid = np.linspace(min_cov, max_cov) col_covs = sub.cov_names - pred_df = pd.DataFrame(dict(zip(col_covs, np.zeros(len(col_covs)))), - index=np.arange(len(dose_grid))) + pred_df = pd.DataFrame( + dict(zip(col_covs, np.zeros(len(col_covs)))), + index=np.arange(len(dose_grid)), + ) - alt_cov_names = ['b_0', 'b_1'] if alt_cov_names is None else alt_cov_names - ref_cov_names = ['a_0', 'a_1'] if ref_cov_names is None else ref_cov_names - pred_df['intercept'] = 1 + alt_cov_names = ["b_0", "b_1"] if alt_cov_names is None else alt_cov_names + ref_cov_names = ["a_0", "a_1"] if ref_cov_names is None else ref_cov_names + pred_df["intercept"] = 1 pred_df[alt_cov_names[0]] = dose_grid pred_df[alt_cov_names[1]] = dose_grid pred_df[ref_cov_names[0]] = knots[0] @@ -60,29 +71,42 @@ def plot_risk_function(mrbrt, pair, beta_samples, gamma_samples, alt_cov_names=N pred_data = MRData() pred_data.load_df(pred_df, col_covs=col_covs) - y_draws = mrbrt.create_draws(pred_data, beta_samples, gamma_samples, random_study=True) - y_draws_fe = mrbrt.create_draws(pred_data, beta_samples, gamma_samples, random_study=False) + y_draws = mrbrt.create_draws( + pred_data, beta_samples, gamma_samples, random_study=True + ) + y_draws_fe = mrbrt.create_draws( + pred_data, beta_samples, gamma_samples, random_study=False + ) num_samples = y_draws_fe.shape[1] sort_index = np.argsort(y_draws_fe[-1]) - trimmed_draws = y_draws_fe[:, sort_index[int(num_samples*0.01): -int(num_samples*0.01)]] - patch_index = np.random.choice(trimmed_draws.shape[1], - y_draws_fe.shape[1] - trimmed_draws.shape[1], replace=True) + trimmed_draws = y_draws_fe[ + :, sort_index[int(num_samples * 0.01) : -int(num_samples * 0.01)] + ] + patch_index = np.random.choice( + trimmed_draws.shape[1], + y_draws_fe.shape[1] - trimmed_draws.shape[1], + replace=True, + ) y_draws_fe = np.hstack((trimmed_draws, trimmed_draws[:, patch_index])) y_mean_fe = np.mean(y_draws_fe, axis=1) y_lower_fe = np.percentile(y_draws_fe, 2.5, axis=1) y_upper_fe = np.percentile(y_draws_fe, 97.5, axis=1) - plt.rcParams['axes.edgecolor'] = '0.15' - plt.rcParams['axes.linewidth'] = 0.5 + plt.rcParams["axes.edgecolor"] = "0.15" + plt.rcParams["axes.linewidth"] = 0.5 - plt.plot(dose_grid, np.exp(y_lower_fe), c='gray') - plt.plot(dose_grid, np.exp(y_upper_fe), c='gray') - plt.plot(dose_grid, np.exp(y_mean_fe), c='red') - plt.ylim([np.exp(y_lower_fe).min() - np.exp(y_mean_fe).ptp()*0.1, - np.exp(y_upper_fe).max() + np.exp(y_mean_fe).ptp()*0.1]) - plt.ylabel('RR', fontsize=10) + plt.plot(dose_grid, np.exp(y_lower_fe), c="gray") + plt.plot(dose_grid, np.exp(y_upper_fe), c="gray") + plt.plot(dose_grid, np.exp(y_mean_fe), c="red") + plt.ylim( + [ + np.exp(y_lower_fe).min() - np.exp(y_mean_fe).ptp() * 0.1, + np.exp(y_upper_fe).max() + np.exp(y_mean_fe).ptp() * 0.1, + ] + ) + plt.ylabel("RR", fontsize=10) plt.xlabel("Exposure", fontsize=10) if plot_note is not None: @@ -91,16 +115,23 @@ def plot_risk_function(mrbrt, pair, beta_samples, gamma_samples, alt_cov_names=N # save plot if write_file: assert plots_dir is not None, "plots_dir is not specified!" - outfile = os.path.join(plots_dir, f'{pair}_risk_function.pdf') - plt.savefig(outfile, bbox_inches='tight') + outfile = os.path.join(plots_dir, f"{pair}_risk_function.pdf") + plt.savefig(outfile, bbox_inches="tight") print(f"Risk function plot saved at {outfile}") else: plt.show() plt.close() -def plot_derivative_fit(mrbrt, pair, alt_cov_names=None, ref_cov_names=None, - plot_note=None, plots_dir=None, write_file=False): +def plot_derivative_fit( + mrbrt, + pair, + alt_cov_names=None, + ref_cov_names=None, + plot_note=None, + plots_dir=None, + write_file=False, +): """Plot fitted derivative. Args: mrbrt (mrtool.MRBRT): @@ -130,15 +161,19 @@ def plot_derivative_fit(mrbrt, pair, alt_cov_names=None, ref_cov_names=None, sub = mrbrt.sub_models[0] col_covs = sub.cov_models[0].covs # construct dataframe for plotting and derivation of derivative. - pred_df = pd.DataFrame(dict(zip(col_covs, np.zeros(len(col_covs)))), - index=np.arange(len(dose_grid))) - pred_df['intercept'] = 1 + pred_df = pd.DataFrame( + dict(zip(col_covs, np.zeros(len(col_covs)))), + index=np.arange(len(dose_grid)), + ) + pred_df["intercept"] = 1 pred_df[alt_cov_names[0]] = dose_grid pred_df[alt_cov_names[1]] = dose_grid # spline of submodel - spline_list = [sub_model.get_cov_model(mrbrt.ensemble_cov_model_name).spline - for sub_model in mrbrt.sub_models] + spline_list = [ + sub_model.get_cov_model(mrbrt.ensemble_cov_model_name).spline + for sub_model in mrbrt.sub_models + ] # beta solution of each submodel; excluding beta of covariates other than [b0, b1, a0, a1] beta_soln_list = [] @@ -146,12 +181,16 @@ def plot_derivative_fit(mrbrt, pair, alt_cov_names=None, ref_cov_names=None, beta_soln_list.append( sub_model.beta_soln[ sub_model.x_vars_indices[ - sub_model.get_cov_model_index( - mrbrt.ensemble_cov_model_name)]]) + sub_model.get_cov_model_index(mrbrt.ensemble_cov_model_name) + ] + ] + ) # get RR from model - d_log_rr = [get_rr_data(pred_df[alt_cov_names[1]], spline_obj, beta_soln) - for spline_obj, beta_soln in zip(spline_list, beta_soln_list)] + d_log_rr = [ + get_rr_data(pred_df[alt_cov_names[1]], spline_obj, beta_soln) + for spline_obj, beta_soln in zip(spline_list, beta_soln_list) + ] alt_covs = mrbrt.data.get_covs(alt_cov_names).T ref_covs = mrbrt.data.get_covs(ref_cov_names).T @@ -165,8 +204,13 @@ def plot_derivative_fit(mrbrt, pair, alt_cov_names=None, ref_cov_names=None, ln_effect_unit = mrbrt.data.obs / (alt_mid - ref_mid) # weight of each submodel multiplied by inlier/outlier weight of submodel - w = np.sum([sub_mr.lt.w * weight for sub_mr, weight - in zip(mrbrt.sub_models, mrbrt.weights)], axis=0) + w = np.sum( + [ + sub_mr.lt.w * weight + for sub_mr, weight in zip(mrbrt.sub_models, mrbrt.weights) + ], + axis=0, + ) inliers = w > 0.6 # size @@ -174,32 +218,59 @@ def plot_derivative_fit(mrbrt, pair, alt_cov_names=None, ref_cov_names=None, pt_size = pt_size * (300 / pt_size.max()) plt.figure(figsize=(12, 10)) - plt.plot(ref_covs, np.array([ln_effect_unit, ln_effect_unit]), color='red', alpha=0.15) - plt.plot(alt_covs, np.array([ln_effect_unit, ln_effect_unit]), color='blue', alpha=0.15) - plt.plot(np.array([ref_covs[1, :], alt_covs[1, :]]), - np.array([ln_effect_unit, ln_effect_unit]), color='grey', alpha=0.15) + plt.plot( + ref_covs, + np.array([ln_effect_unit, ln_effect_unit]), + color="red", + alpha=0.15, + ) + plt.plot( + alt_covs, + np.array([ln_effect_unit, ln_effect_unit]), + color="blue", + alpha=0.15, + ) + plt.plot( + np.array([ref_covs[1, :], alt_covs[1, :]]), + np.array([ln_effect_unit, ln_effect_unit]), + color="grey", + alpha=0.15, + ) # plot slope for each submodel for slope in d_log_rr: - plt.plot(dose_grid, slope, color='green', alpha=0.15) + plt.plot(dose_grid, slope, color="green", alpha=0.15) - plt.ylabel('Slope of ln(RR)', fontsize=10) - plt.xlabel('Exposure', fontsize=10) + plt.ylabel("Slope of ln(RR)", fontsize=10) + plt.xlabel("Exposure", fontsize=10) plt.tick_params(labelsize=8) # scatterplot of empirical slope of each point; inlier/outlier specified; # point side is proportional to obs_se squared. - plt.scatter(effect_mid[inliers], ln_effect_unit[inliers], s=pt_size[inliers], - c='grey', edgecolors='black', marker='o', alpha=0.75) - plt.scatter(effect_mid[~inliers], ln_effect_unit[~inliers], s=pt_size[~inliers], - c='black', marker='x', alpha=0.75) - plt.axhline(0, xmin=min_cov, xmax=max_cov, linestyle='--', c='grey') + plt.scatter( + effect_mid[inliers], + ln_effect_unit[inliers], + s=pt_size[inliers], + c="grey", + edgecolors="black", + marker="o", + alpha=0.75, + ) + plt.scatter( + effect_mid[~inliers], + ln_effect_unit[~inliers], + s=pt_size[~inliers], + c="black", + marker="x", + alpha=0.75, + ) + plt.axhline(0, xmin=min_cov, xmax=max_cov, linestyle="--", c="grey") if plot_note is not None: plt.title(plot_note) # save plot if write_file: assert plots_dir is not None, "plots_dir is not specified!" - outfile = os.path.join(plots_dir, f'{pair}_derivative_fit.pdf') - plt.savefig(outfile, bbox_inches='tight') + outfile = os.path.join(plots_dir, f"{pair}_derivative_fit.pdf") + plt.savefig(outfile, bbox_inches="tight") print(f"Derivative fit plot saved at {outfile}") else: plt.show() diff --git a/src/mrtool/core/utils.py b/src/mrtool/core/utils.py index c234db9..d55b5df 100644 --- a/src/mrtool/core/utils.py +++ b/src/mrtool/core/utils.py @@ -1,9 +1,10 @@ # -*- coding: utf-8 -*- """ - utils - ~~~~~ - `utils` module of the `mrtool` package. +utils +~~~~~ +`utils` module of the `mrtool` package. """ + from typing import Union, List, Any, Tuple import numpy as np import pandas as pd @@ -22,8 +23,7 @@ def get_cols(df, cols): """ assert isinstance(df, pd.DataFrame) if isinstance(cols, list): - assert all([isinstance(col, str) and col in df - for col in cols]) + assert all([isinstance(col, str) and col in df for col in cols]) else: assert (cols is None) or (isinstance(cols, str) and cols in df) @@ -44,8 +44,7 @@ def is_cols(cols): """ ok = isinstance(cols, (str, list)) or cols is None if isinstance(cols, list): - ok = ok and all([isinstance(col, str) - for col in cols]) + ok = ok and all([isinstance(col, str) for col in cols]) return ok @@ -147,8 +146,10 @@ def is_gaussian_prior(prior, size=None): ok = ok and np.all(prior[1] > 0.0) return ok + is_laplace_prior = is_gaussian_prior + def is_uniform_prior(prior, size=None): """Check if variable satisfy uniform prior format @@ -194,15 +195,17 @@ def input_gaussian_prior(prior, size): """ assert is_gaussian_prior(prior) if prior is None or prior.size == 0: - return np.array([[0.0]*size, [np.inf]*size]) + return np.array([[0.0] * size, [np.inf] * size]) elif prior.ndim == 1: return np.repeat(prior[:, None], size, axis=1) else: assert prior.shape[1] == size return prior + input_laplace_prior = input_gaussian_prior + def input_uniform_prior(prior, size): """Process the input Gaussian prior @@ -220,7 +223,7 @@ def input_uniform_prior(prior, size): """ assert is_uniform_prior(prior) if prior is None or prior.size == 0: - return np.array([[-np.inf]*size, [np.inf]*size]) + return np.array([[-np.inf] * size, [np.inf] * size]) elif prior.ndim == 1: return np.repeat(prior[:, None], size, axis=1) else: @@ -252,8 +255,13 @@ def avg_integral(mat, spline=None, use_spline_intercept=False): index = 0 if use_spline_intercept else 1 if mat.shape[1] == 1: - return mat if spline is None else spline.design_mat( - mat.ravel(), l_extra=True, r_extra=True)[:, index:] + return ( + mat + if spline is None + else spline.design_mat(mat.ravel(), l_extra=True, r_extra=True)[ + :, index: + ] + ) else: if spline is None: return mat.mean(axis=1)[:, None] @@ -261,28 +269,33 @@ def avg_integral(mat, spline=None, use_spline_intercept=False): x0 = mat[:, 0] x1 = mat[:, 1] dx = x1 - x0 - val_idx = (dx == 0.0) - int_idx = (dx != 0.0) + val_idx = dx == 0.0 + int_idx = dx != 0.0 mat = np.zeros((dx.size, spline.num_spline_bases)) if np.any(val_idx): - mat[val_idx, :] = spline.design_mat(x0[val_idx], - l_extra=True, - r_extra=True) + mat[val_idx, :] = spline.design_mat( + x0[val_idx], l_extra=True, r_extra=True + ) if np.any(int_idx): - mat[int_idx, :] = spline.design_imat( - x0[int_idx], x1[int_idx], 1, - l_extra=True, - r_extra=True)/(dx[int_idx][:, None]) + mat[int_idx, :] = ( + spline.design_imat( + x0[int_idx], x1[int_idx], 1, l_extra=True, r_extra=True + ) + / (dx[int_idx][:, None]) + ) return mat[:, index:] # random knots -def sample_knots(num_knots: int, knot_bounds: np.ndarray, - min_dist: Union[float, np.ndarray], - num_samples: int = 1) -> np.ndarray: +def sample_knots( + num_knots: int, + knot_bounds: np.ndarray, + min_dist: Union[float, np.ndarray], + num_samples: int = 1, +) -> np.ndarray: """Sample knot vectors given a set of rules. Parameters @@ -306,12 +319,13 @@ def sample_knots(num_knots: int, knot_bounds: np.ndarray, """ # Check input - _check_nums('num_knots', num_knots) - _check_nums('num_samples', num_samples) + _check_nums("num_knots", num_knots) + _check_nums("num_samples", num_samples) knot_bounds = _check_knot_bounds(num_knots, knot_bounds) min_dist = _check_min_dist(num_knots, min_dist) - left_bounds, right_bounds = _check_feasibility(num_knots, knot_bounds, - min_dist) + left_bounds, right_bounds = _check_feasibility( + num_knots, knot_bounds, min_dist + ) # Sample knots knots = np.zeros((num_samples, num_knots + 2)) @@ -320,7 +334,7 @@ def sample_knots(num_knots: int, knot_bounds: np.ndarray, for ii in range(num_knots): left_bound = np.maximum(left_bounds[ii], knots[:, ii] + min_dist[ii]) if np.any(left_bound > right_bounds[ii]): - raise ValueError('empty sampling interval') + raise ValueError("empty sampling interval") knots[:, ii + 1] = np.random.uniform(left_bound, right_bounds[ii]) return knots @@ -338,53 +352,57 @@ def _check_knot_bounds(num_knots: int, knot_bounds: np.ndarray) -> np.ndarray: try: knot_bounds = np.asarray(knot_bounds, dtype=float) except Exception as error: - raise TypeError('knot_bounds must be an array') from error + raise TypeError("knot_bounds must be an array") from error if knot_bounds.shape != (num_knots, 2): if knot_bounds.shape == (2,): knot_bounds = np.tile(knot_bounds, (num_knots, 1)) else: - msg = 'knot_bounds must have shape(2,) or (num_knots,2)' + msg = "knot_bounds must have shape(2,) or (num_knots,2)" raise ValueError(msg) - bounds_sorted = np.all(np.diff(knot_bounds, axis=1) > 0.) - neighbors_sorted = np.all(np.diff(knot_bounds, axis=0) >= 0.) + bounds_sorted = np.all(np.diff(knot_bounds, axis=1) > 0.0) + neighbors_sorted = np.all(np.diff(knot_bounds, axis=0) >= 0.0) if not (bounds_sorted and neighbors_sorted): - raise ValueError('knot_bounds must be sorted') + raise ValueError("knot_bounds must be sorted") return knot_bounds -def _check_min_dist(num_knots: int, - min_dist: Union[float, np.ndarray]) -> np.ndarray: +def _check_min_dist( + num_knots: int, min_dist: Union[float, np.ndarray] +) -> np.ndarray: """Check knot min_dist.""" if np.isscalar(min_dist): min_dist = np.tile(min_dist, num_knots + 1) try: min_dist = np.asarray(min_dist, dtype=float) except Exception as error: - raise TypeError('min_dist must be a float or array') from error + raise TypeError("min_dist must be a float or array") from error if min_dist.shape != (num_knots + 1,): - raise ValueError('min_dist must have shape(num_knots+1,)') - if np.any(min_dist < 0.): - raise ValueError('min_dist must be positive') + raise ValueError("min_dist must have shape(num_knots+1,)") + if np.any(min_dist < 0.0): + raise ValueError("min_dist must be positive") return min_dist -def _check_feasibility(num_knots: int, knot_bounds: np.ndarray, - min_dist: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: +def _check_feasibility( + num_knots: int, knot_bounds: np.ndarray, min_dist: np.ndarray +) -> Tuple[np.ndarray, np.ndarray]: """Check knot feasibility and get left and right boundaries.""" if np.sum(min_dist) > knot_bounds[-1, 1] - knot_bounds[0, 0]: - raise ValueError('min_dist cannot exceed knot_bounds') + raise ValueError("min_dist cannot exceed knot_bounds") left_bounds = np.zeros(num_knots) left_bounds[0] = knot_bounds[0, 0] + min_dist[0] for ii in range(1, num_knots): - left_bounds[ii] = np.maximum(knot_bounds[ii, 0], - left_bounds[ii - 1] + min_dist[ii]) + left_bounds[ii] = np.maximum( + knot_bounds[ii, 0], left_bounds[ii - 1] + min_dist[ii] + ) right_bounds = np.zeros(num_knots) right_bounds[-1] = knot_bounds[-1, 1] - min_dist[-1] for ii in range(-2, -(num_knots + 1), -1): - right_bounds[ii] = np.minimum(knot_bounds[ii, 1], - right_bounds[ii + 1] - min_dist[ii]) + right_bounds[ii] = np.minimum( + knot_bounds[ii, 1], right_bounds[ii + 1] - min_dist[ii] + ) if np.any(left_bounds > right_bounds): - raise ValueError('knot_bounds and min_dist not feasible') + raise ValueError("knot_bounds and min_dist not feasible") return left_bounds, right_bounds @@ -394,18 +412,18 @@ def nonlinear_trans(score, slope=6.0, quantile=0.7): if score_max == score_min: return np.ones(len(score)) else: - weight = (score - score_min)/(score_max - score_min) + weight = (score - score_min) / (score_max - score_min) sorted_weight = np.sort(weight) - x = sorted_weight[int(0.8*weight.size)] + x = sorted_weight[int(0.8 * weight.size)] y = 1.0 - x # calculate the transformation coefficient c = np.zeros(4) - c[1] = slope*x**2/quantile - c[0] = quantile*np.exp(c[1]/x) - c[3] = slope*y**2/(1.0 - quantile) - c[2] = (1.0 - quantile)*np.exp(c[3]/y) + c[1] = slope * x**2 / quantile + c[0] = quantile * np.exp(c[1] / x) + c[3] = slope * y**2 / (1.0 - quantile) + c[2] = (1.0 - quantile) * np.exp(c[3] / y) weight_trans = np.zeros(weight.size) @@ -414,14 +432,15 @@ def nonlinear_trans(score, slope=6.0, quantile=0.7): if w == 0.0: weight_trans[i] = 0.0 elif w < x: - weight_trans[i] = c[0]*np.exp(-c[1]/w) + weight_trans[i] = c[0] * np.exp(-c[1] / w) elif w < 1.0: - weight_trans[i] = 1.0 - c[2]*np.exp(-c[3]/(1.0 - w)) + weight_trans[i] = 1.0 - c[2] * np.exp(-c[3] / (1.0 - w)) else: weight_trans[i] = 1.0 - weight_trans = (weight_trans - np.min(weight_trans))/\ - (np.max(weight_trans) - np.min(weight_trans)) + weight_trans = (weight_trans - np.min(weight_trans)) / ( + np.max(weight_trans) - np.min(weight_trans) + ) return weight_trans @@ -441,6 +460,7 @@ def mat_to_fun(alt_mat, ref_mat=None): mat = alt_mat else: mat = alt_mat - ref_mat + def fun(x, mat=mat): return mat.dot(x) @@ -449,6 +469,7 @@ def jac_fun(x, mat=mat): return fun, jac_fun + def mat_to_log_fun(alt_mat, ref_mat=None, add_one=True): alt_mat = np.array(alt_mat) shift = 1.0 if add_one else 0.0 @@ -462,21 +483,24 @@ def mat_to_log_fun(alt_mat, ref_mat=None, add_one=True): jac_fun = None else: if ref_mat is None or ref_mat.size == 0: + def fun(beta): alt_val = np.abs(shift + alt_mat.dot(beta)) return np.log(alt_val) def jac_fun(beta): - return alt_mat/(shift + alt_mat.dot(beta)[:, None]) + return alt_mat / (shift + alt_mat.dot(beta)[:, None]) else: + def fun(beta): alt_val = np.abs(shift + alt_mat.dot(beta)) ref_val = np.abs(shift + ref_mat.dot(beta)) return np.log(alt_val) - np.log(ref_val) def jac_fun(beta): - return alt_mat/(shift + alt_mat.dot(beta)[:, None]) - \ - ref_mat/(shift + ref_mat.dot(beta)[:, None]) + return alt_mat / ( + shift + alt_mat.dot(beta)[:, None] + ) - ref_mat / (shift + ref_mat.dot(beta)[:, None]) return fun, jac_fun @@ -484,6 +508,7 @@ def jac_fun(beta): def empty_array(): return np.array(list()) + def to_list(obj: Any) -> List[Any]: """Convert objective to list of object. @@ -510,11 +535,13 @@ def is_numeric_array(array: np.ndarray) -> bool: Returns: bool: True if the array is numeric. """ - numerical_dtype_kinds = {'b', # boolean - 'u', # unsigned integer - 'i', # signed integer - 'f', # floats - 'c'} # complex + numerical_dtype_kinds = { + "b", # boolean + "u", # unsigned integer + "i", # signed integer + "f", # floats + "c", + } # complex try: return array.dtype.kind in numerical_dtype_kinds except AttributeError: @@ -522,10 +549,9 @@ def is_numeric_array(array: np.ndarray) -> bool: return np.asarray(array).dtype.kind in numerical_dtype_kinds -def expand_array(array: np.ndarray, - shape: Tuple[int], - value: Any, - name: str) -> np.ndarray: +def expand_array( + array: np.ndarray, shape: Tuple[int], value: Any, name: str +) -> np.ndarray: """Expand array when it is empty. Args: @@ -541,9 +567,11 @@ def expand_array(array: np.ndarray, """ array = np.array(array) if len(array) == 0: - if hasattr(value, '__iter__') and not isinstance(value, str): + if hasattr(value, "__iter__") and not isinstance(value, str): value = np.array(value) - assert value.shape == shape, f"{name}, alternative value inconsistent shape." + assert ( + value.shape == shape + ), f"{name}, alternative value inconsistent shape." array = value else: array = np.full(shape, value) @@ -553,8 +581,7 @@ def expand_array(array: np.ndarray, def ravel_dict(x: dict) -> dict: - """Ravel dictionary. - """ + """Ravel dictionary.""" assert all([isinstance(k, str) for k in x.keys()]) assert all([isinstance(v, np.ndarray) for v in x.values()]) new_x = {} @@ -563,5 +590,5 @@ def ravel_dict(x: dict) -> dict: new_x[k] = v else: for i in range(v.size): - new_x[f'{k}_{i}'] = v[i] + new_x[f"{k}_{i}"] = v[i] return new_x diff --git a/src/mrtool/cov_selection/covfinder.py b/src/mrtool/cov_selection/covfinder.py index a4b0eef..184fb21 100644 --- a/src/mrtool/cov_selection/covfinder.py +++ b/src/mrtool/cov_selection/covfinder.py @@ -1,8 +1,9 @@ # -*- coding: utf-8 -*- """ - Cov Finder - ~~~~~~~~~~ +Cov Finder +~~~~~~~~~~ """ + import warnings from copy import deepcopy from typing import Dict, List, Tuple, Union @@ -13,26 +14,28 @@ class CovFinder: - """Class in charge of the covariate selection. - """ + """Class in charge of the covariate selection.""" + zero_gamma_uprior = np.array([0.0, 0.0]) loose_gamma_uprior = np.array([1.0, 1.0]) - def __init__(self, - data: MRData, - covs: List[str], - pre_selected_covs: Union[List[str], None] = None, - normalized_covs: bool = True, - num_samples: int = 1000, - laplace_threshold: float = 1e-5, - power_range: Tuple[float, float] = (-8, 8), - power_step_size: float = 0.5, - inlier_pct: float = 1.0, - alpha: float = 0.05, - beta_gprior: Dict[str, np.ndarray] = None, - beta_gprior_std: float = 1.0, - bias_zero: bool = False, - use_re: Union[Dict, None] = None): + def __init__( + self, + data: MRData, + covs: List[str], + pre_selected_covs: Union[List[str], None] = None, + normalized_covs: bool = True, + num_samples: int = 1000, + laplace_threshold: float = 1e-5, + power_range: Tuple[float, float] = (-8, 8), + power_step_size: float = 0.5, + inlier_pct: float = 1.0, + alpha: float = 0.05, + beta_gprior: Dict[str, np.ndarray] = None, + beta_gprior_std: float = 1.0, + bias_zero: bool = False, + use_re: Union[Dict, None] = None, + ): """Covariate Finder. Args: @@ -63,13 +66,18 @@ def __init__(self, self.data = data self.covs = covs - self.pre_selected_covs = [] if pre_selected_covs is None else pre_selected_covs - assert len(set(self.pre_selected_covs) & set(self.covs)) == 0, \ - "covs and pre_selected_covs should be mutually exclusive." + self.pre_selected_covs = ( + [] if pre_selected_covs is None else pre_selected_covs + ) + assert ( + len(set(self.pre_selected_covs) & set(self.covs)) == 0 + ), "covs and pre_selected_covs should be mutually exclusive." self.normalize_covs = normalized_covs self.inlier_pct = inlier_pct self.beta_gprior_std = beta_gprior_std - assert self.beta_gprior_std > 0.0, f"beta_gprior_std={self.beta_gprior_std} has to be positive." + assert ( + self.beta_gprior_std > 0.0 + ), f"beta_gprior_std={self.beta_gprior_std} has to be positive." self.bias_zero = bias_zero self.alpha = alpha if self.normalize_covs: @@ -92,13 +100,17 @@ def __init__(self, self.num_covs = len(pre_selected_covs) + len(covs) if len(covs) == 0: - warnings.warn("There is no covariates to select, will return the pre-selected covariates.") + warnings.warn( + "There is no covariates to select, will return the pre-selected covariates." + ) self.stop = True - def create_model(self, - covs: List[str], - prior_type: str = 'Laplace', - laplace_std: float = None) -> MRBRT: + def create_model( + self, + covs: List[str], + prior_type: str = "Laplace", + laplace_std: float = None, + ) -> MRBRT: """Create Gaussian or Laplace model. Args: @@ -109,31 +121,49 @@ def create_model(self, Return: MRBRT: Created model object. """ - assert prior_type in ['Laplace', 'Gaussian'], "Prior type can only 'Laplace' or 'Gaussian'." - if prior_type == 'Laplace': - assert laplace_std is not None, "Use Laplace prior must provide standard deviation." - - if prior_type == 'Laplace': + assert prior_type in [ + "Laplace", + "Gaussian", + ], "Prior type can only 'Laplace' or 'Gaussian'." + if prior_type == "Laplace": + assert ( + laplace_std is not None + ), "Use Laplace prior must provide standard deviation." + + if prior_type == "Laplace": cov_models = [ - LinearCovModel(cov, use_re=True, - prior_beta_laplace=np.array([0.0, laplace_std]) - if cov not in self.selected_covs else None, - prior_beta_gaussian=None - if cov not in self.selected_covs else self.beta_gprior[cov], - prior_gamma_uniform=self.loose_gamma_uprior if self.use_re[cov] else - self.zero_gamma_uprior) + LinearCovModel( + cov, + use_re=True, + prior_beta_laplace=np.array([0.0, laplace_std]) + if cov not in self.selected_covs + else None, + prior_beta_gaussian=None + if cov not in self.selected_covs + else self.beta_gprior[cov], + prior_gamma_uniform=self.loose_gamma_uprior + if self.use_re[cov] + else self.zero_gamma_uprior, + ) for cov in covs ] else: cov_models = [ - LinearCovModel(cov, use_re=True, - prior_beta_gaussian=None - if cov not in self.beta_gprior else self.beta_gprior[cov], - prior_gamma_uniform=self.loose_gamma_uprior if self.use_re[cov] else - self.zero_gamma_uprior) + LinearCovModel( + cov, + use_re=True, + prior_beta_gaussian=None + if cov not in self.beta_gprior + else self.beta_gprior[cov], + prior_gamma_uniform=self.loose_gamma_uprior + if self.use_re[cov] + else self.zero_gamma_uprior, + ) for cov in covs ] - model = MRBRT(self.data, cov_models=cov_models, inlier_pct=self.inlier_pct) + model = MRBRT( + self.data, cov_models=cov_models, inlier_pct=self.inlier_pct + ) return model def fit_gaussian_model(self, covs: List[str]) -> MRBRT: @@ -145,7 +175,7 @@ def fit_gaussian_model(self, covs: List[str]) -> MRBRT: Returns: MRBRT: the fitted model object. """ - gaussian_model = self.create_model(covs, prior_type='Gaussian') + gaussian_model = self.create_model(covs, prior_type="Gaussian") gaussian_model.fit_model(x0=np.zeros(gaussian_model.num_vars)) empirical_gamma = np.var(gaussian_model.u_soln, axis=0) gaussian_model.gamma_soln = empirical_gamma @@ -162,13 +192,17 @@ def fit_laplace_model(self, covs: List[str], laplace_std: float) -> MRBRT: Returns: MRBRT: the fitted model object. """ - laplace_model = self.create_model(covs, prior_type='Laplace', laplace_std=laplace_std) + laplace_model = self.create_model( + covs, prior_type="Laplace", laplace_std=laplace_std + ) lprior = laplace_model.create_lprior() scale = 1 if np.isinf(lprior[1]).all() else 2 - laplace_model.fit_model(x0=np.zeros(scale*laplace_model.num_vars)) + laplace_model.fit_model(x0=np.zeros(scale * laplace_model.num_vars)) return laplace_model - def summary_gaussian_model(self, gaussian_model: MRBRT) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + def summary_gaussian_model( + self, gaussian_model: MRBRT + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """Summary the gaussian model. Return the mean standard deviation and the significance indicator of beta. @@ -182,12 +216,13 @@ def summary_gaussian_model(self, gaussian_model: MRBRT) -> Tuple[np.ndarray, np. beta_samples = gaussian_model.lt.sample_beta(size=self.num_samples) beta_mean = gaussian_model.beta_soln beta_std = np.std(beta_samples, axis=0) - beta_sig = self.is_significance(beta_samples, var_type='beta', alpha=self.alpha) + beta_sig = self.is_significance( + beta_samples, var_type="beta", alpha=self.alpha + ) return beta_mean, beta_std, beta_sig def fit_pre_selected_covs(self): - """Fit the pre-selected covariates. - """ + """Fit the pre-selected covariates.""" if len(self.pre_selected_covs) != 0: gaussian_model = self.fit_gaussian_model(self.pre_selected_covs) beta_mean, beta_std, _ = self.summary_gaussian_model(gaussian_model) @@ -199,33 +234,42 @@ def select_covs_by_laplace(self, laplace_std: float, verbose: bool = False): laplace_model = self.fit_laplace_model(self.all_covs, laplace_std) additional_covs = [] for i, cov in enumerate(self.all_covs): - if np.abs(laplace_model.beta_soln[i]) > self.laplace_threshold and cov not in self.selected_covs: + if ( + np.abs(laplace_model.beta_soln[i]) > self.laplace_threshold + and cov not in self.selected_covs + ): additional_covs.append(cov) if verbose: - print(f'Laplace std: {laplace_std}') - print(' potential additional covariates', additional_covs) + print(f"Laplace std: {laplace_std}") + print(" potential additional covariates", additional_covs) if len(additional_covs) > 0: candidate_covs = self.selected_covs + additional_covs gaussian_model = self.fit_gaussian_model(candidate_covs) - beta_soln_mean, beta_soln_std, beta_soln_sig = self.summary_gaussian_model(gaussian_model) + beta_soln_mean, beta_soln_std, beta_soln_sig = ( + self.summary_gaussian_model(gaussian_model) + ) if verbose: - print(' Gaussian model fe_mean:', beta_soln_mean) - print(' Gaussiam model fe_std: ', beta_soln_std) - print(' Gaussian model re_var: ', gaussian_model.gamma_soln) - print(' significance:', beta_soln_sig) + print(" Gaussian model fe_mean:", beta_soln_mean) + print(" Gaussiam model fe_std: ", beta_soln_std) + print(" Gaussian model re_var: ", gaussian_model.gamma_soln) + print(" significance:", beta_soln_sig) # update the selected covs i_start = len(self.selected_covs) i_end = len(candidate_covs) for i in range(i_start, i_end): if beta_soln_sig[i]: self.selected_covs.append(candidate_covs[i]) - self.update_beta_gprior([candidate_covs[i]], - np.array([0.0 if self.bias_zero else beta_soln_mean[i]]), - np.array([self.beta_gprior_std])) + self.update_beta_gprior( + [candidate_covs[i]], + np.array( + [0.0 if self.bias_zero else beta_soln_mean[i]] + ), + np.array([self.beta_gprior_std]), + ) if verbose: - print(' selected covariates:', self.selected_covs) + print(" selected covariates:", self.selected_covs) # update the stop self.stop = not all(beta_soln_sig) @@ -233,7 +277,9 @@ def select_covs_by_laplace(self, laplace_std: float, verbose: bool = False): if len(self.selected_covs) == self.num_covs: self.stop = True - def update_beta_gprior(self, covs: List[str], mean: np.ndarray, std: np.ndarray): + def update_beta_gprior( + self, covs: List[str], mean: np.ndarray, std: np.ndarray + ): """Update the beta Gaussian prior. Args: @@ -243,7 +289,9 @@ def update_beta_gprior(self, covs: List[str], mean: np.ndarray, std: np.ndarray) """ for i, cov in enumerate(covs): if cov not in self.all_covs: - raise ValueError(f"Unrecognized covaraite {cov} for beta Gaussian prior update.") + raise ValueError( + f"Unrecognized covaraite {cov} for beta Gaussian prior update." + ) if cov not in self.beta_gprior: self.beta_gprior[cov] = np.array([mean[i], std[i]]) @@ -257,12 +305,16 @@ def select_covs(self, verbose: bool = False): self.stop = True @staticmethod - def is_significance(var_samples: np.ndarray, - var_type: str = 'beta', - alpha: float = 0.05) -> np.ndarray: - assert var_type == 'beta', "Only support variable type beta." - assert 0.0 < alpha < 1.0, "Significance threshold has to be between 0 and 1." - var_uis = np.quantile(var_samples, (0.5*alpha, 1 - 0.5*alpha), axis=0) + def is_significance( + var_samples: np.ndarray, var_type: str = "beta", alpha: float = 0.05 + ) -> np.ndarray: + assert var_type == "beta", "Only support variable type beta." + assert ( + 0.0 < alpha < 1.0 + ), "Significance threshold has to be between 0 and 1." + var_uis = np.quantile( + var_samples, (0.5 * alpha, 1 - 0.5 * alpha), axis=0 + ) var_sig = var_uis.prod(axis=0) > 0 return var_sig diff --git a/tests/test_covmodel.py b/tests/test_covmodel.py index f77817b..8bf2af1 100644 --- a/tests/test_covmodel.py +++ b/tests/test_covmodel.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- """ - Test covariate model +Test covariate model """ + import numpy as np import pandas as pd import pytest @@ -15,24 +16,28 @@ @pytest.fixture def mrdata(seed=123): np.random.seed(seed) - data = pd.DataFrame({ - 'obs': np.random.randn(10), - 'obs_se': np.full(10, 0.1), - 'cov0': np.ones(10), - 'cov1': np.random.randn(10), - 'study_id': np.random.choice(range(3), 10) - }) + data = pd.DataFrame( + { + "obs": np.random.randn(10), + "obs_se": np.full(10, 0.1), + "cov0": np.ones(10), + "cov1": np.random.randn(10), + "study_id": np.random.choice(range(3), 10), + } + ) mrdata = MRData() - mrdata.load_df(data, - col_obs='obs', - col_obs_se='obs_se', - col_covs=['cov0', 'cov1'], - col_study_id='study_id') + mrdata.load_df( + data, + col_obs="obs", + col_obs_se="obs_se", + col_covs=["cov0", "cov1"], + col_study_id="study_id", + ) return mrdata def test_covmodel_default(): - covmodel = CovModel('cov0') + covmodel = CovModel("cov0") # direct priors assert all(covmodel.prior_beta_gaussian == DUMMY_GPRIOR) assert all(covmodel.prior_beta_laplace == DUMMY_LPRIOR) @@ -61,9 +66,9 @@ def test_covmodel_default(): assert covmodel.num_regularizations == 0 -@pytest.mark.parametrize('use_re', [True, False]) +@pytest.mark.parametrize("use_re", [True, False]) def test_covmodel_re(use_re): - covmodel = CovModel('cov0', use_re=use_re) + covmodel = CovModel("cov0", use_re=use_re) assert covmodel.num_x_vars == 1 if use_re: assert covmodel.num_z_vars == 1 @@ -77,27 +82,43 @@ def test_covmodel_re(use_re): assert covmodel.prior_gamma_uniform.size == 0 -@pytest.mark.parametrize('spline_knots', [np.array([0.0, 0.25, 0.75, 1.0]), - np.array([0.0, 0.25, 0.75]), - np.array([0.25, 0.75, 1.0]), - np.array([0.25, 0.75])]) -@pytest.mark.parametrize('spline_degree', [2, 3]) -@pytest.mark.parametrize('spline_l_linear', [True, False]) -@pytest.mark.parametrize('spline_r_linear', [True, False]) -@pytest.mark.parametrize('use_re', [True]) -@pytest.mark.parametrize('use_re_mid_point', [True, False]) -def test_covmodel_spline(spline_knots, spline_degree, spline_l_linear, spline_r_linear, - use_re, use_re_mid_point, mrdata): - covmodel = CovModel('cov1', - use_re=use_re, - use_re_mid_point=use_re_mid_point, - use_spline=True, - spline_knots=spline_knots, - spline_degree=spline_degree, - spline_l_linear=spline_l_linear, - spline_r_linear=spline_r_linear) - - assert all(covmodel.spline_knots_template == np.array([0.0, 0.25, 0.75, 1.0])) +@pytest.mark.parametrize( + "spline_knots", + [ + np.array([0.0, 0.25, 0.75, 1.0]), + np.array([0.0, 0.25, 0.75]), + np.array([0.25, 0.75, 1.0]), + np.array([0.25, 0.75]), + ], +) +@pytest.mark.parametrize("spline_degree", [2, 3]) +@pytest.mark.parametrize("spline_l_linear", [True, False]) +@pytest.mark.parametrize("spline_r_linear", [True, False]) +@pytest.mark.parametrize("use_re", [True]) +@pytest.mark.parametrize("use_re_mid_point", [True, False]) +def test_covmodel_spline( + spline_knots, + spline_degree, + spline_l_linear, + spline_r_linear, + use_re, + use_re_mid_point, + mrdata, +): + covmodel = CovModel( + "cov1", + use_re=use_re, + use_re_mid_point=use_re_mid_point, + use_spline=True, + spline_knots=spline_knots, + spline_degree=spline_degree, + spline_l_linear=spline_l_linear, + spline_r_linear=spline_r_linear, + ) + + assert all( + covmodel.spline_knots_template == np.array([0.0, 0.25, 0.75, 1.0]) + ) assert covmodel.spline_degree == spline_degree assert covmodel.spline_l_linear == spline_l_linear assert covmodel.spline_r_linear == spline_r_linear @@ -112,127 +133,171 @@ def test_covmodel_spline(spline_knots, spline_degree, spline_l_linear, spline_r_ assert covmodel.num_z_vars == 0 -@pytest.mark.parametrize('prior_beta_gaussian', [np.array([0.0, 1.0])]) -@pytest.mark.parametrize('use_spline', [True, False]) +@pytest.mark.parametrize("prior_beta_gaussian", [np.array([0.0, 1.0])]) +@pytest.mark.parametrize("use_spline", [True, False]) def test_covmodel_beta_gprior(prior_beta_gaussian, use_spline, mrdata): - covmodel = CovModel('cov1', use_spline=use_spline, - prior_beta_gaussian=prior_beta_gaussian) + covmodel = CovModel( + "cov1", use_spline=use_spline, prior_beta_gaussian=prior_beta_gaussian + ) covmodel.attach_data(mrdata) assert covmodel.prior_beta_gaussian.ndim == 2 assert np.all(covmodel.prior_beta_gaussian[0] == 0.0) assert np.all(covmodel.prior_beta_gaussian[1] == 1.0) -@pytest.mark.parametrize('prior_beta_laplace', [np.array([0.0, 1.0])]) -@pytest.mark.parametrize('use_spline', [True, False]) +@pytest.mark.parametrize("prior_beta_laplace", [np.array([0.0, 1.0])]) +@pytest.mark.parametrize("use_spline", [True, False]) def test_covmodel_beta_lprior(prior_beta_laplace, use_spline, mrdata): - covmodel = CovModel('cov1', use_spline=use_spline, - prior_beta_laplace=prior_beta_laplace) + covmodel = CovModel( + "cov1", use_spline=use_spline, prior_beta_laplace=prior_beta_laplace + ) covmodel.attach_data(mrdata) assert covmodel.prior_beta_laplace.ndim == 2 assert np.all(covmodel.prior_beta_laplace[0] == 0.0) assert np.all(covmodel.prior_beta_laplace[1] == 1.0) -@pytest.mark.parametrize('prior_beta_uniform', [np.array([0.0, 1.0])]) -@pytest.mark.parametrize('use_spline', [True, False]) +@pytest.mark.parametrize("prior_beta_uniform", [np.array([0.0, 1.0])]) +@pytest.mark.parametrize("use_spline", [True, False]) def test_covmodel_beta_uprior(prior_beta_uniform, use_spline, mrdata): - covmodel = CovModel('cov1', use_spline=use_spline, - prior_beta_uniform=prior_beta_uniform) + covmodel = CovModel( + "cov1", use_spline=use_spline, prior_beta_uniform=prior_beta_uniform + ) covmodel.attach_data(mrdata) assert covmodel.prior_beta_uniform.ndim == 2 assert np.all(covmodel.prior_beta_uniform[0] == 0.0) assert np.all(covmodel.prior_beta_uniform[1] == 1.0) -@pytest.mark.parametrize('prior_gamma_gaussian', [np.array([0.0, 1.0])]) -@pytest.mark.parametrize('use_re', [True]) -@pytest.mark.parametrize('use_spline', [True, False]) -@pytest.mark.parametrize('use_re_mid_point', [True, False]) -def test_covmodel_gamma_gprior(prior_gamma_gaussian, use_spline, use_re, use_re_mid_point, mrdata): - covmodel = CovModel('cov1', use_spline=True, - prior_gamma_gaussian=prior_gamma_gaussian, - use_re=use_re, use_re_mid_point=use_re_mid_point) +@pytest.mark.parametrize("prior_gamma_gaussian", [np.array([0.0, 1.0])]) +@pytest.mark.parametrize("use_re", [True]) +@pytest.mark.parametrize("use_spline", [True, False]) +@pytest.mark.parametrize("use_re_mid_point", [True, False]) +def test_covmodel_gamma_gprior( + prior_gamma_gaussian, use_spline, use_re, use_re_mid_point, mrdata +): + covmodel = CovModel( + "cov1", + use_spline=True, + prior_gamma_gaussian=prior_gamma_gaussian, + use_re=use_re, + use_re_mid_point=use_re_mid_point, + ) covmodel.attach_data(mrdata) assert covmodel.prior_gamma_gaussian.ndim == 2 assert np.all(covmodel.prior_gamma_gaussian[0] == 0.0) assert np.all(covmodel.prior_gamma_gaussian[1] == 1.0) -@pytest.mark.parametrize('prior_gamma_laplace', [np.array([0.0, 1.0])]) -@pytest.mark.parametrize('use_re', [True]) -@pytest.mark.parametrize('use_spline', [True, False]) -@pytest.mark.parametrize('use_re_mid_point', [True, False]) -def test_covmodel_gamma_lprior(prior_gamma_laplace, use_spline, use_re, use_re_mid_point, mrdata): - covmodel = CovModel('cov1', use_spline=use_spline, - prior_gamma_laplace=prior_gamma_laplace, - use_re=use_re, use_re_mid_point=use_re_mid_point) +@pytest.mark.parametrize("prior_gamma_laplace", [np.array([0.0, 1.0])]) +@pytest.mark.parametrize("use_re", [True]) +@pytest.mark.parametrize("use_spline", [True, False]) +@pytest.mark.parametrize("use_re_mid_point", [True, False]) +def test_covmodel_gamma_lprior( + prior_gamma_laplace, use_spline, use_re, use_re_mid_point, mrdata +): + covmodel = CovModel( + "cov1", + use_spline=use_spline, + prior_gamma_laplace=prior_gamma_laplace, + use_re=use_re, + use_re_mid_point=use_re_mid_point, + ) covmodel.attach_data(mrdata) assert covmodel.prior_gamma_laplace.ndim == 2 assert np.all(covmodel.prior_gamma_laplace[0] == 0.0) assert np.all(covmodel.prior_gamma_laplace[1] == 1.0) -@pytest.mark.parametrize('prior_gamma_uniform', [np.array([0.0, 1.0])]) -@pytest.mark.parametrize('use_re', [True]) -@pytest.mark.parametrize('use_spline', [True, False]) -@pytest.mark.parametrize('use_re_mid_point', [True, False]) -def test_covmodel_gamma_uprior(prior_gamma_uniform, use_spline, use_re, use_re_mid_point, mrdata): - covmodel = CovModel('cov1', use_spline=use_spline, - prior_gamma_uniform=prior_gamma_uniform, - use_re=use_re, use_re_mid_point=use_re_mid_point) +@pytest.mark.parametrize("prior_gamma_uniform", [np.array([0.0, 1.0])]) +@pytest.mark.parametrize("use_re", [True]) +@pytest.mark.parametrize("use_spline", [True, False]) +@pytest.mark.parametrize("use_re_mid_point", [True, False]) +def test_covmodel_gamma_uprior( + prior_gamma_uniform, use_spline, use_re, use_re_mid_point, mrdata +): + covmodel = CovModel( + "cov1", + use_spline=use_spline, + prior_gamma_uniform=prior_gamma_uniform, + use_re=use_re, + use_re_mid_point=use_re_mid_point, + ) covmodel.attach_data(mrdata) assert covmodel.prior_gamma_uniform.ndim == 2 assert np.all(covmodel.prior_gamma_uniform[0] == 0.0) assert np.all(covmodel.prior_gamma_uniform[1] == 1.0) -@pytest.mark.parametrize('use_spline', [True]) -@pytest.mark.parametrize('prior_spline_monotonicity', ['increasing', 'decreasing', None]) -@pytest.mark.parametrize('prior_spline_convexity', ['convex', 'concave', None]) -@pytest.mark.parametrize('prior_spline_num_constraint_points', [20, 40]) -@pytest.mark.parametrize('prior_spline_maxder_gaussian', [np.array([0.0, 1.0]), None]) -@pytest.mark.parametrize('prior_spline_maxder_uniform', [np.array([0.0, 1.0]), None]) -def test_covmodel_spline_prior(use_spline, - prior_spline_monotonicity, - prior_spline_convexity, - prior_spline_num_constraint_points, - prior_spline_maxder_gaussian, - prior_spline_maxder_uniform, - mrdata): - covmodel = CovModel('cov1', - use_spline=use_spline, - prior_spline_monotonicity=prior_spline_monotonicity, - prior_spline_convexity=prior_spline_convexity, - prior_spline_num_constraint_points=prior_spline_num_constraint_points, - prior_spline_maxder_gaussian=prior_spline_maxder_gaussian, - prior_spline_maxder_uniform=prior_spline_maxder_uniform) +@pytest.mark.parametrize("use_spline", [True]) +@pytest.mark.parametrize( + "prior_spline_monotonicity", ["increasing", "decreasing", None] +) +@pytest.mark.parametrize("prior_spline_convexity", ["convex", "concave", None]) +@pytest.mark.parametrize("prior_spline_num_constraint_points", [20, 40]) +@pytest.mark.parametrize( + "prior_spline_maxder_gaussian", [np.array([0.0, 1.0]), None] +) +@pytest.mark.parametrize( + "prior_spline_maxder_uniform", [np.array([0.0, 1.0]), None] +) +def test_covmodel_spline_prior( + use_spline, + prior_spline_monotonicity, + prior_spline_convexity, + prior_spline_num_constraint_points, + prior_spline_maxder_gaussian, + prior_spline_maxder_uniform, + mrdata, +): + covmodel = CovModel( + "cov1", + use_spline=use_spline, + prior_spline_monotonicity=prior_spline_monotonicity, + prior_spline_convexity=prior_spline_convexity, + prior_spline_num_constraint_points=prior_spline_num_constraint_points, + prior_spline_maxder_gaussian=prior_spline_maxder_gaussian, + prior_spline_maxder_uniform=prior_spline_maxder_uniform, + ) covmodel.attach_data(mrdata) - num_constraints = ((prior_spline_monotonicity is not None) + - (prior_spline_convexity is not None))*prior_spline_num_constraint_points - num_constraints += (prior_spline_maxder_uniform is not None)*covmodel.spline.num_intervals - num_regularizers = (prior_spline_maxder_gaussian is not None)*covmodel.spline.num_intervals + num_constraints = ( + (prior_spline_monotonicity is not None) + + (prior_spline_convexity is not None) + ) * prior_spline_num_constraint_points + num_constraints += ( + prior_spline_maxder_uniform is not None + ) * covmodel.spline.num_intervals + num_regularizers = ( + prior_spline_maxder_gaussian is not None + ) * covmodel.spline.num_intervals assert covmodel.num_constraints == num_constraints assert covmodel.num_regularizations == num_regularizers def test_use_spline_intercept(mrdata): - linear_cov_model = LinearCovModel('cov1', - use_spline=True, - use_spline_intercept=True, - use_re=True, - use_re_mid_point=True, - prior_spline_monotonicity='increasing') + linear_cov_model = LinearCovModel( + "cov1", + use_spline=True, + use_spline_intercept=True, + use_re=True, + use_re_mid_point=True, + prior_spline_monotonicity="increasing", + ) linear_cov_model.attach_data(mrdata) - assert linear_cov_model.num_x_vars == linear_cov_model.spline.num_spline_bases + assert ( + linear_cov_model.num_x_vars == linear_cov_model.spline.num_spline_bases + ) assert linear_cov_model.num_z_vars == 2 alt_mat, ref_mat = linear_cov_model.create_design_mat(mrdata) c_mat, c_val = linear_cov_model.create_constraint_mat() assert alt_mat.shape == (mrdata.num_obs, linear_cov_model.num_x_vars) - assert c_mat.shape == (linear_cov_model.prior_spline_num_constraint_points, linear_cov_model.num_x_vars) + assert c_mat.shape == ( + linear_cov_model.prior_spline_num_constraint_points, + linear_cov_model.num_x_vars, + ) # with pytest.raises(ValueError): # LogCovModel('cov1', use_spline=True, use_spline_intercept=True, use_re=True, diff --git a/tests/test_data.py b/tests/test_data.py index 04e104e..79c9d2c 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -1,9 +1,10 @@ # -*- coding: utf-8 -*- """ - test_data - ~~~~~~~~~ - Test `data` module for `mrtool` package. +test_data +~~~~~~~~~ +Test `data` module for `mrtool` package. """ + import numpy as np import pandas as pd import xarray as xr @@ -14,110 +15,108 @@ @pytest.fixture() def df(): num_obs = 5 - df = pd.DataFrame({ - 'obs': np.random.randn(num_obs), - 'obs_se': np.random.rand(num_obs) + 0.01, - 'cov0': np.random.randn(num_obs), - 'cov1': np.random.randn(num_obs), - 'cov2': np.random.randn(num_obs), - }) + df = pd.DataFrame( + { + "obs": np.random.randn(num_obs), + "obs_se": np.random.rand(num_obs) + 0.01, + "cov0": np.random.randn(num_obs), + "cov1": np.random.randn(num_obs), + "cov2": np.random.randn(num_obs), + } + ) return df + @pytest.fixture() def xarray(): - example_dataset = xr.Dataset({ - "y": - xr.DataArray( + example_dataset = xr.Dataset( + { + "y": xr.DataArray( np.random.random([2, 2]), dims=["age_group_id", "location_id"], name="random_met_need", - coords={"age_group_id": [2, 3], - "location_id": [6, 102]}), - "y_se": - xr.DataArray( + coords={"age_group_id": [2, 3], "location_id": [6, 102]}, + ), + "y_se": xr.DataArray( np.ones([2, 2]), dims=["age_group_id", "location_id"], name="random_met_need", - coords={"age_group_id": [2, 3], - "location_id": [6, 102]}), - "sdi": - xr.DataArray( - np.ones([2, 2])*5, + coords={"age_group_id": [2, 3], "location_id": [6, 102]}, + ), + "sdi": xr.DataArray( + np.ones([2, 2]) * 5, dims=["age_group_id", "location_id"], name="random_education", - coords={"age_group_id": [2, 3], - "location_id": [6, 102]}), - "sdi_se": - xr.DataArray( - np.ones([2, 2])*0, + coords={"age_group_id": [2, 3], "location_id": [6, 102]}, + ), + "sdi_se": xr.DataArray( + np.ones([2, 2]) * 0, dims=["age_group_id", "location_id"], name="random_education", - coords={"age_group_id": [2, 3], - "location_id": [6, 102]}), - }) + coords={"age_group_id": [2, 3], "location_id": [6, 102]}, + ), + } + ) return example_dataset @pytest.fixture def data(df): - df['study_id'] = np.array([0, 0, 1, 1, 2]) + df["study_id"] = np.array([0, 0, 1, 1, 2]) d = MRData() d.load_df( df, - col_obs='obs', - col_obs_se='obs_se', - col_covs=[f'cov{i}' for i in range(3)], - col_study_id='study_id' + col_obs="obs", + col_obs_se="obs_se", + col_covs=[f"cov{i}" for i in range(3)], + col_study_id="study_id", ) return d -@pytest.mark.parametrize('obs', ['obs', None]) -@pytest.mark.parametrize('obs_se', ['obs_se', None]) +@pytest.mark.parametrize("obs", ["obs", None]) +@pytest.mark.parametrize("obs_se", ["obs_se", None]) def test_obs(df, obs, obs_se): d = MRData() - d.load_df(df, - col_obs=obs, - col_obs_se=obs_se, - col_covs=['cov0', 'cov1', 'cov2']) + d.load_df( + df, col_obs=obs, col_obs_se=obs_se, col_covs=["cov0", "cov1", "cov2"] + ) assert d.obs.size == df.shape[0] assert d.obs_se.size == df.shape[0] if obs is None: assert all(np.isnan(d.obs)) -@pytest.mark.parametrize('covs', [None, - ['cov0', 'cov1', 'cov2']]) +@pytest.mark.parametrize("covs", [None, ["cov0", "cov1", "cov2"]]) def test_covs(df, covs): d = MRData() - d.load_df(df, - col_obs='obs', - col_obs_se='obs_se', - col_covs=covs) + d.load_df(df, col_obs="obs", col_obs_se="obs_se", col_covs=covs) num_covs = 0 if covs is None else len(covs) num_covs += 1 assert d.num_covs == num_covs -@pytest.mark.parametrize('study_id', [None, np.array([0, 0, 1, 1, 2])]) +@pytest.mark.parametrize("study_id", [None, np.array([0, 0, 1, 1, 2])]) def test_study_id(df, study_id): if study_id is not None: - df['study_id'] = study_id - col_study_id = 'study_id' + df["study_id"] = study_id + col_study_id = "study_id" else: col_study_id = None d = MRData() - d.load_df(df, - col_obs='obs', - col_obs_se='obs_se', - col_covs=['cov0', 'cov1', 'cov2'], - col_study_id=col_study_id) + d.load_df( + df, + col_obs="obs", + col_obs_se="obs_se", + col_covs=["cov0", "cov1", "cov2"], + col_study_id=col_study_id, + ) if col_study_id is None: - assert np.all(d.study_id == 'Unknown') + assert np.all(d.study_id == "Unknown") assert d.num_studies == 1 - assert d.studies[0] == 'Unknown' + assert d.studies[0] == "Unknown" else: assert np.allclose(d.study_id, np.array([0, 0, 1, 1, 2])) assert d.num_studies == 3 @@ -125,37 +124,41 @@ def test_study_id(df, study_id): assert np.allclose(d.study_sizes, np.array([2, 2, 1])) -@pytest.mark.parametrize('study_id', [None, - np.array([0, 0, 1, 1, 2]), - np.array([2, 0, 0, 1, 1])]) +@pytest.mark.parametrize( + "study_id", [None, np.array([0, 0, 1, 1, 2]), np.array([2, 0, 0, 1, 1])] +) def test_data_id(df, study_id): if study_id is not None: - df['study_id'] = study_id - col_study_id = 'study_id' + df["study_id"] = study_id + col_study_id = "study_id" else: col_study_id = None d = MRData() - d.load_df(df, - col_obs='obs', - col_obs_se='obs_se', - col_covs=['cov0', 'cov1', 'cov2'], - col_study_id=col_study_id) + d.load_df( + df, + col_obs="obs", + col_obs_se="obs_se", + col_covs=["cov0", "cov1", "cov2"], + col_study_id=col_study_id, + ) d._sort_by_data_id() - assert np.allclose(d.obs, df['obs']) - assert np.allclose(d.obs_se, df['obs_se']) + assert np.allclose(d.obs, df["obs"]) + assert np.allclose(d.obs_se, df["obs_se"]) for i in range(3): - assert np.allclose(d.covs[f'cov{i}'], df[f'cov{i}']) + assert np.allclose(d.covs[f"cov{i}"], df[f"cov{i}"]) def test_is_empty(df): d = MRData() assert d.is_empty() - d.load_df(df, - col_obs='obs', - col_obs_se='obs_se', - col_covs=['cov0', 'cov1', 'cov2']) + d.load_df( + df, + col_obs="obs", + col_obs_se="obs_se", + col_covs=["cov0", "cov1", "cov2"], + ) assert not d.is_empty() d.reset() assert d.is_empty() @@ -169,78 +172,95 @@ def test_assert_not_empty(): def test_has_covs(df): d = MRData() - d.load_df(df, - col_obs='obs', - col_obs_se='obs_se', - col_covs=['cov0', 'cov1', 'cov2']) - assert d.has_covs(['cov0']) - assert d.has_covs(['cov0', 'cov1']) - assert not d.has_covs(['cov3']) + d.load_df( + df, + col_obs="obs", + col_obs_se="obs_se", + col_covs=["cov0", "cov1", "cov2"], + ) + assert d.has_covs(["cov0"]) + assert d.has_covs(["cov0", "cov1"]) + assert not d.has_covs(["cov3"]) + def test_assert_has_covs(df): d = MRData() - d.load_df(df, - col_obs='obs', - col_obs_se='obs_se', - col_covs=['cov0', 'cov1', 'cov2']) + d.load_df( + df, + col_obs="obs", + col_obs_se="obs_se", + col_covs=["cov0", "cov1", "cov2"], + ) with pytest.raises(ValueError): - d._assert_has_covs('cov3') + d._assert_has_covs("cov3") def test_get_covs(df): d = MRData() - d.load_df(df, - col_obs='obs', - col_obs_se='obs_se', - col_covs=['cov0', 'cov1', 'cov2']) - for cov_name in ['cov0', 'cov1', 'cov2']: - assert np.allclose(d.get_covs(cov_name), df[cov_name].to_numpy()[:, None]) + d.load_df( + df, + col_obs="obs", + col_obs_se="obs_se", + col_covs=["cov0", "cov1", "cov2"], + ) + for cov_name in ["cov0", "cov1", "cov2"]: + assert np.allclose( + d.get_covs(cov_name), df[cov_name].to_numpy()[:, None] + ) - cov_mat = d.get_covs(['cov0', 'cov1', 'cov2']) - assert np.allclose(cov_mat, df[['cov0', 'cov1', 'cov2']].to_numpy()) + cov_mat = d.get_covs(["cov0", "cov1", "cov2"]) + assert np.allclose(cov_mat, df[["cov0", "cov1", "cov2"]].to_numpy()) -@pytest.mark.parametrize('covs', [None, 'cov0', ['cov0', 'cov1']]) +@pytest.mark.parametrize("covs", [None, "cov0", ["cov0", "cov1"]]) def test_normalize_covs(df, covs): d = MRData() - d.load_df(df, - col_obs='obs', - col_obs_se='obs_se', - col_covs=['cov0', 'cov1', 'cov2']) + d.load_df( + df, + col_obs="obs", + col_obs_se="obs_se", + col_covs=["cov0", "cov1", "cov2"], + ) d.normalize_covs(covs) assert d.is_cov_normalized(covs) -@pytest.mark.parametrize('covs', [['cov0', 'cov1']]) +@pytest.mark.parametrize("covs", [["cov0", "cov1"]]) def test_remove_nan_in_covs(df, covs): df.loc[:0, covs] = np.nan d = MRData() with pytest.warns(Warning): - d.load_df(df, - col_obs='obs', - col_obs_se='obs_se', - col_covs=covs) + d.load_df(df, col_obs="obs", col_obs_se="obs_se", col_covs=covs) assert d.num_obs == df.shape[0] - 1 def test_load_xr(xarray): d = MRData() - d.load_xr(xarray, - var_obs='y', - var_obs_se='y_se', - var_covs=['sdi'], - coord_study_id='location_id') + d.load_xr( + xarray, + var_obs="y", + var_obs_se="y_se", + var_covs=["sdi"], + coord_study_id="location_id", + ) - assert np.allclose(np.sort(d.obs), np.sort(xarray['y'].data.flatten())) - assert np.allclose(np.sort(d.obs_se), np.sort(xarray['y_se'].data.flatten())) - assert np.allclose(np.sort(d.covs['sdi']), np.sort(xarray['sdi'].data.flatten())) - assert np.allclose(np.sort(d.studies), np.sort(xarray.coords['location_id'])) + assert np.allclose(np.sort(d.obs), np.sort(xarray["y"].data.flatten())) + assert np.allclose( + np.sort(d.obs_se), np.sort(xarray["y_se"].data.flatten()) + ) + assert np.allclose( + np.sort(d.covs["sdi"]), np.sort(xarray["sdi"].data.flatten()) + ) + assert np.allclose( + np.sort(d.studies), np.sort(xarray.coords["location_id"]) + ) -@pytest.mark.parametrize('index', [np.array([True, True, False, False, False]), - np.array([0, 1])]) +@pytest.mark.parametrize( + "index", [np.array([True, True, False, False, False]), np.array([0, 1])] +) def test_get_data(index, data): sub_data = data._get_data(index) @@ -252,10 +272,10 @@ def test_get_data(index, data): assert np.allclose(data.covs[cov_name][index], sub_data.covs[cov_name]) -@pytest.mark.parametrize(('studies', 'result'), [([0, 1, 2], True), - ([3, 4, 0], False), - (0, True), - (3, False)]) +@pytest.mark.parametrize( + ("studies", "result"), + [([0, 1, 2], True), ([3, 4, 0], False), (0, True), (3, False)], +) def test_has_studies(studies, result, data): assert data.has_studies(studies) == result @@ -265,7 +285,7 @@ def test_assert_has_studies(data): data._assert_has_studies(3) -@pytest.mark.parametrize('studies', [0, [0], [0, 1], [0, 1, 2]]) +@pytest.mark.parametrize("studies", [0, [0], [0, 1], [0, 1, 2]]) def test_get_study_data(studies, data): sub_data = data.get_study_data(studies) diff --git a/tests/test_utils.py b/tests/test_utils.py index 997c840..68d4532 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,41 +1,51 @@ # -*- coding: utf-8 -*- """ - test_utils - ~~~~~~~~~~ - Test `utils` module of `sfma` package. +test_utils +~~~~~~~~~~ +Test `utils` module of `sfma` package. """ + import numpy as np import pandas as pd import pytest from mrtool import utils -@pytest.mark.parametrize('df', [pd.DataFrame({'alpha': np.ones(5), - 'beta': np.zeros(5)})]) -@pytest.mark.parametrize(('cols', 'col_shape'), - [('alpha', (5,)), - ('beta', (5,)), - (['alpha'], (5, 1)), - (['beta'], (5, 1)), - (['alpha', 'beta'], (5, 2)), - (None, (5, 0))]) +@pytest.mark.parametrize( + "df", [pd.DataFrame({"alpha": np.ones(5), "beta": np.zeros(5)})] +) +@pytest.mark.parametrize( + ("cols", "col_shape"), + [ + ("alpha", (5,)), + ("beta", (5,)), + (["alpha"], (5, 1)), + (["beta"], (5, 1)), + (["alpha", "beta"], (5, 2)), + (None, (5, 0)), + ], +) def test_get_cols(df, cols, col_shape): col = utils.get_cols(df, cols) assert col.shape == col_shape -@pytest.mark.parametrize(('cols', 'ok'), - [('col0', True), - (['col0', 'col1'], True), - ([], True), - (None, True), - (1, False)]) +@pytest.mark.parametrize( + ("cols", "ok"), + [ + ("col0", True), + (["col0", "col1"], True), + ([], True), + (None, True), + (1, False), + ], +) def test_is_cols(cols, ok): assert ok == utils.is_cols(cols) -@pytest.mark.parametrize('cols', [None, 'col0', ['col0', 'col1']]) -@pytest.mark.parametrize('default', [None, 'col0', ['col0', 'col1']]) +@pytest.mark.parametrize("cols", [None, "col0", ["col0", "col1"]]) +@pytest.mark.parametrize("default", [None, "col0", ["col0", "col1"]]) def test_input_cols_default(cols, default): result_cols = utils.input_cols(cols, default=default) if cols is None: @@ -44,55 +54,65 @@ def test_input_cols_default(cols, default): assert result_cols == cols -@pytest.mark.parametrize('cols', [None, 'col0', ['col0', 'col1']]) -@pytest.mark.parametrize('full_cols', [None, ['col2']]) +@pytest.mark.parametrize("cols", [None, "col0", ["col0", "col1"]]) +@pytest.mark.parametrize("full_cols", [None, ["col2"]]) def test_input_cols_append_to(cols, full_cols): cols = utils.input_cols(cols, append_to=full_cols) if full_cols is not None and cols: - assert 'col0' in full_cols and 'col2' in full_cols + assert "col0" in full_cols and "col2" in full_cols if isinstance(cols, list): - assert 'col1' in full_cols + assert "col1" in full_cols def test_sizes_to_indices(sizes, indices): my_indices = utils.sizes_to_indices(sizes) - assert all([np.allclose(my_indices[i], indices[i]) - for i in range(len(sizes))]) + assert all( + [np.allclose(my_indices[i], indices[i]) for i in range(len(sizes))] + ) -@pytest.mark.parametrize('sizes', [np.array([1, 2, 3])]) -@pytest.mark.parametrize('indices', [[np.arange(0, 1), - np.arange(1, 3), - np.arange(3, 6)]]) +@pytest.mark.parametrize("sizes", [np.array([1, 2, 3])]) +@pytest.mark.parametrize( + "indices", [[np.arange(0, 1), np.arange(1, 3), np.arange(3, 6)]] +) def test_sizes_to_indices(sizes, indices): my_indices = utils.sizes_to_indices(sizes) - assert all([np.allclose(my_indices[i], indices[i]) - for i in range(len(sizes))]) - - -@pytest.mark.parametrize(('prior', 'result'), - [(np.array([0.0, 1.0]), True), - (np.array([[0.0]*2, [1.0]*2]), True), - (np.array([0.0, -1.0]), False), - (np.array([[0.0]*2, [-1.0]*2]), False), - (None, True), - ('gaussian_prior', False)]) + assert all( + [np.allclose(my_indices[i], indices[i]) for i in range(len(sizes))] + ) + + +@pytest.mark.parametrize( + ("prior", "result"), + [ + (np.array([0.0, 1.0]), True), + (np.array([[0.0] * 2, [1.0] * 2]), True), + (np.array([0.0, -1.0]), False), + (np.array([[0.0] * 2, [-1.0] * 2]), False), + (None, True), + ("gaussian_prior", False), + ], +) def test_is_gaussian_prior(prior, result): assert utils.is_gaussian_prior(prior) == result -@pytest.mark.parametrize(('prior', 'result'), - [(np.array([0.0, 1.0]), True), - (np.array([[0.0]*2, [1.0]*2]), True), - (np.array([0.0, -1.0]), False), - (np.array([[0.0]*2, [-1.0]*2]), False), - (None, True), - ('uniform_prior', False)]) +@pytest.mark.parametrize( + ("prior", "result"), + [ + (np.array([0.0, 1.0]), True), + (np.array([[0.0] * 2, [1.0] * 2]), True), + (np.array([0.0, -1.0]), False), + (np.array([[0.0] * 2, [-1.0] * 2]), False), + (None, True), + ("uniform_prior", False), + ], +) def test_is_uniform_prior(prior, result): assert utils.is_uniform_prior(prior) == result -@pytest.mark.parametrize('obj', [1, 1.0, 'a', True, [1], [1.0], ['a'], [True]]) +@pytest.mark.parametrize("obj", [1, 1.0, "a", True, [1], [1.0], ["a"], [True]]) def test_to_list(obj): obj_list = utils.to_list(obj) if isinstance(obj, list): From 71fd16958b74db4dc1464deb0f255097c8c9ed4d Mon Sep 17 00:00:00 2001 From: zhengp0 Date: Tue, 25 Jun 2024 08:50:32 -0700 Subject: [PATCH 2/5] pass all ruff checkes --- pyproject.toml | 2 +- src/mrtool/__init__.py | 15 +++++++++++++-- src/mrtool/core/plots.py | 7 ++++--- tests/test_utils.py | 8 +------- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c152806..d0f5c91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ license = { file = "LICENSE" } authors = [ { name = "IHME Math Sciences", email = "ihme.math.sciences@gmail.com" }, ] -dependencies = ["numpy", "pandas", "scipy", "xspline<0.1.0", "xarray", "limetr<0.1.0"] +dependencies = ["numpy", "pandas", "scipy", "xspline<0.1.0", "xarray", "limetr<0.1.0", "matplotlib"] [project.optional-dependencies] dev = ["pytest", "pytest-mock"] diff --git a/src/mrtool/__init__.py b/src/mrtool/__init__.py index 79307ec..b70360a 100644 --- a/src/mrtool/__init__.py +++ b/src/mrtool/__init__.py @@ -6,8 +6,19 @@ `mrtool` package. """ -from .core.data import MRData +from .core import utils from .core.cov_model import CovModel, LinearCovModel, LogCovModel +from .core.data import MRData from .core.model import MRBRT, MRBeRT -from .core import utils from .cov_selection.covfinder import CovFinder + +__all__ = [ + "MRData", + "CovModel", + "LinearCovModel", + "LogCovModel", + "MRBRT", + "MRBeRT", + "utils", + "CovFinder", +] diff --git a/src/mrtool/core/plots.py b/src/mrtool/core/plots.py index 49bbc7c..16d48e2 100644 --- a/src/mrtool/core/plots.py +++ b/src/mrtool/core/plots.py @@ -3,6 +3,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd + from mrtool import MRData @@ -71,9 +72,9 @@ def plot_risk_function( pred_data = MRData() pred_data.load_df(pred_df, col_covs=col_covs) - y_draws = mrbrt.create_draws( - pred_data, beta_samples, gamma_samples, random_study=True - ) + # y_draws = mrbrt.create_draws( + # pred_data, beta_samples, gamma_samples, random_study=True + # ) y_draws_fe = mrbrt.create_draws( pred_data, beta_samples, gamma_samples, random_study=False ) diff --git a/tests/test_utils.py b/tests/test_utils.py index 68d4532..2416189 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -8,6 +8,7 @@ import numpy as np import pandas as pd import pytest + from mrtool import utils @@ -64,13 +65,6 @@ def test_input_cols_append_to(cols, full_cols): assert "col1" in full_cols -def test_sizes_to_indices(sizes, indices): - my_indices = utils.sizes_to_indices(sizes) - assert all( - [np.allclose(my_indices[i], indices[i]) for i in range(len(sizes))] - ) - - @pytest.mark.parametrize("sizes", [np.array([1, 2, 3])]) @pytest.mark.parametrize( "indices", [[np.arange(0, 1), np.arange(1, 3), np.arange(3, 6)]] From 07d9cc166c9ec549f4dfa0789bcd63f538811298 Mon Sep 17 00:00:00 2001 From: zhengp0 Date: Tue, 25 Jun 2024 08:56:39 -0700 Subject: [PATCH 3/5] switch the typing hinting to use built-in type --- docs/source/conf.py | 4 +-- src/mrtool/core/cov_model.py | 22 ++++++-------- src/mrtool/core/data.py | 44 ++++++++++++++------------- src/mrtool/core/model.py | 24 +++++++-------- src/mrtool/core/plots.py | 10 +++--- src/mrtool/core/utils.py | 15 ++++----- src/mrtool/cov_selection/covfinder.py | 40 ++++++++++++------------ 7 files changed, 80 insertions(+), 79 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index d1e8644..24b7df4 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -12,8 +12,8 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. # -from pathlib import Path import sys +from pathlib import Path import mrtool @@ -64,7 +64,7 @@ source_suffix = ".rst" master_doc = "index" -# List of patterns, relative to source directory, that match files and +# list of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [] diff --git a/src/mrtool/core/cov_model.py b/src/mrtool/core/cov_model.py index 665250f..bb877a8 100644 --- a/src/mrtool/core/cov_model.py +++ b/src/mrtool/core/cov_model.py @@ -6,8 +6,6 @@ Covariates model for `mrtool`. """ -from typing import Tuple - import numpy as np import xspline @@ -94,34 +92,34 @@ def __init__( If use right linear tail. prior_spline_derval_gaussian (np.ndarray, optional): Gaussian prior for the derivative value of the spline. - prior_spline_derval_gaussian_domain (Tuple[float, float], optional): + prior_spline_derval_gaussian_domain (tuple[float, float], optional): Domain for the Gaussian prior for the derivative value of the spline. prior_spline_derval_uniform (np.ndarray, optional): Uniform prior for the derivative value of the spline. - prior_spline_derval_uniform_domain (Tuple[float, float], optional): + prior_spline_derval_uniform_domain (tuple[float, float], optional): Domain for the uniform prior for the derivative value of the spline. prior_spline_der2val_gaussian (np.ndarray, optional): Gaussian prior for the second order derivative value of the spline. - prior_spline_der2val_gaussian_domain (Tuple[float, float], optional): + prior_spline_der2val_gaussian_domain (tuple[float, float], optional): Domain for the Gaussian prior for the second order derivative value of the spline. prior_spline_der2val_uniform (np.ndarray, optional): Uniform prior for the second order derivative value of the spline. - prior_spline_der2val_uniform_domain (Tuple[float, float], optional): + prior_spline_der2val_uniform_domain (tuple[float, float], optional): Domain for the uniform prior for the second order derivative value of the spline. prior_spline_funval_gaussian (np.ndarray, optional): Gaussian prior for the function value of the spline. - prior_spline_funval_gaussian_domain (Tuple[float, float], optional): + prior_spline_funval_gaussian_domain (tuple[float, float], optional): Domain for the Gaussian prior for the function value of the spline. prior_spline_funval_uniform (np.ndarray, optional): Uniform prior for the function value of the spline. - prior_spline_funval_uniform_domain (Tuple[float, float], optional): + prior_spline_funval_uniform_domain (tuple[float, float], optional): Domain for the uniform prior for the function value of the spline. prior_spline_monotonicity (str | None, optional): Spline shape prior, `'increasing'` indicates spline is increasing, `'decreasing'` indicates spline is decreasing. - prior_spline_monotonicity_domain (Tuple[float, float], optional): + prior_spline_monotonicity_domain (tuple[float, float], optional): Domain where spline monotonicity prior applies. Default to `(0.0, 1.0)`. - prior_spline_convexity_domain (Tuple[float, float], optional): + prior_spline_convexity_domain (tuple[float, float], optional): Domain where spline convexity prior applies. Default to `(0.0, 1.0)`. prior_spline_convexity (str | None, optional): Spline shape prior, `'convex'` indicate if spline is convex and @@ -555,7 +553,7 @@ def create_z_mat(self, data): "Cannot use create_z_mat directly in CovModel class." ) - def create_constraint_mat(self) -> Tuple[np.ndarray, np.ndarray]: + def create_constraint_mat(self) -> tuple[np.ndarray, np.ndarray]: """Create constraint matrix. Returns: tuple{numpy.ndarray, numpy.ndarray}: @@ -670,7 +668,7 @@ def create_constraint_mat(self) -> Tuple[np.ndarray, np.ndarray]: return c_mat, c_val - def create_regularization_mat(self) -> Tuple[np.ndarray, np.ndarray]: + def create_regularization_mat(self) -> tuple[np.ndarray, np.ndarray]: """Create constraint matrix. Returns: tuple{numpy.ndarray, numpy.ndarray}: diff --git a/src/mrtool/core/data.py b/src/mrtool/core/data.py index f7c281a..57468be 100644 --- a/src/mrtool/core/data.py +++ b/src/mrtool/core/data.py @@ -6,12 +6,14 @@ `data` module for `mrtool` package. """ -from typing import Dict, List, Union, Any import warnings from dataclasses import dataclass, field +from typing import Any, Union + import numpy as np import pandas as pd -from .utils import empty_array, to_list, is_numeric_array, expand_array + +from .utils import empty_array, expand_array, is_numeric_array, to_list @dataclass @@ -20,10 +22,10 @@ class MRData: obs: np.ndarray = field(default_factory=empty_array) obs_se: np.ndarray = field(default_factory=empty_array) - covs: Dict[str, np.ndarray] = field(default_factory=dict) + covs: dict[str, np.ndarray] = field(default_factory=dict) study_id: np.ndarray = field(default_factory=empty_array) data_id: np.ndarray = field(default_factory=empty_array) - cov_scales: Dict[str, float] = field(init=False, default_factory=dict) + cov_scales: dict[str, float] = field(init=False, default_factory=dict) def __post_init__(self): self._check_attr_type() @@ -207,7 +209,7 @@ def _assert_not_empty(self): raise ValueError("MRData object is empty.") def is_cov_normalized( - self, covs: Union[List[str], str, None] = None + self, covs: Union[list[str], str, None] = None ) -> bool: """Return true when covariates are normalized.""" if covs is None: @@ -237,7 +239,7 @@ def load_df( data: pd.DataFrame, col_obs: Union[str, None] = None, col_obs_se: Union[str, None] = None, - col_covs: Union[List[str], None] = None, + col_covs: Union[list[str], None] = None, col_study_id: Union[str, None] = None, col_data_id: Union[str, None] = None, ): @@ -273,7 +275,7 @@ def load_xr( data, var_obs: Union[str, None] = None, var_obs_se: Union[str, None] = None, - var_covs: Union[List[str], None] = None, + var_covs: Union[list[str], None] = None, coord_study_id: Union[str, None] = None, ): """Load data from xarray.""" @@ -312,12 +314,12 @@ def to_df(self) -> pd.DataFrame: return df - def has_covs(self, covs: Union[List[str], str]) -> bool: + def has_covs(self, covs: Union[list[str], str]) -> bool: """If the data has the provided covariates. Args: - covs (Union[List[str], str]): - List of covariate names or one covariate name. + covs (Union[list[str], str]): + list of covariate names or one covariate name. Returns: bool: If has covariates return `True`. @@ -328,12 +330,12 @@ def has_covs(self, covs: Union[List[str], str]) -> bool: else: return all([cov in self.covs for cov in covs]) - def has_studies(self, studies: Union[List[Any], Any]) -> bool: + def has_studies(self, studies: Union[list[Any], Any]) -> bool: """If the data has provided study_id Args: - studies Union[List[Any], Any]: - List of studies or one study. + studies Union[list[Any], Any]: + list of studies or one study. Returns: bool: If has studies return `True`. @@ -344,7 +346,7 @@ def has_studies(self, studies: Union[List[Any], Any]) -> bool: else: return all([study in self.studies for study in studies]) - def _assert_has_covs(self, covs: Union[List[str], str]): + def _assert_has_covs(self, covs: Union[list[str], str]): """Assert has covariates otherwise raise ValueError.""" if not self.has_covs(covs): covs = to_list(covs) @@ -353,7 +355,7 @@ def _assert_has_covs(self, covs: Union[List[str], str]): f"MRData object do not contain covariates: {missing_covs}." ) - def _assert_has_studies(self, studies: Union[List[Any], Any]): + def _assert_has_studies(self, studies: Union[list[Any], Any]): """Assert has studies otherwise raise ValueError.""" if not self.has_studies(studies): studies = to_list(studies) @@ -364,12 +366,12 @@ def _assert_has_studies(self, studies: Union[List[Any], Any]): f"MRData object do not contain studies: {missing_studies}." ) - def get_covs(self, covs: Union[List[str], str]) -> np.ndarray: + def get_covs(self, covs: Union[list[str], str]) -> np.ndarray: """Get covariate matrix. Args: - covs (Union[List[str], str]): - List of covariate names or one covariate name. + covs (Union[list[str], str]): + list of covariate names or one covariate name. Returns: np.ndarray: Covariates matrix, in the column fashion. @@ -383,11 +385,11 @@ def get_covs(self, covs: Union[List[str], str]) -> np.ndarray: [self.covs[cov_names][:, None] for cov_names in covs] ) - def get_study_data(self, studies: Union[List[Any], Any]) -> "MRData": + def get_study_data(self, studies: Union[list[Any], Any]) -> "MRData": """Get study specific data. Args: - studies (Union[List[Any], Any]): List of studies or one study. + studies (Union[list[Any], Any]): list of studies or one study. Returns MRData: Data object contains the study specific data. @@ -397,7 +399,7 @@ def get_study_data(self, studies: Union[List[Any], Any]) -> "MRData": index = np.array([study in studies for study in self.study_id]) return self._get_data(index) - def normalize_covs(self, covs: Union[List[str], str, None] = None): + def normalize_covs(self, covs: Union[list[str], str, None] = None): """Normalize covariates by the largest absolute value for each covariate.""" if covs is None: covs = list(self.covs.keys()) diff --git a/src/mrtool/core/model.py b/src/mrtool/core/model.py index 29fdefb..c25eac5 100644 --- a/src/mrtool/core/model.py +++ b/src/mrtool/core/model.py @@ -7,7 +7,7 @@ """ from copy import deepcopy -from typing import List, Tuple, Union +from typing import Union import numpy as np import pandas as pd @@ -23,13 +23,13 @@ class MRBRT: """MR-BRT Object""" def __init__( - self, data: MRData, cov_models: List[CovModel], inlier_pct: float = 1.0 + self, data: MRData, cov_models: list[CovModel], inlier_pct: float = 1.0 ): """Constructor of MRBRT. Args: data (MRData): Data for meta-regression. - cov_models (List[CovModel]): A list of covariates models. + cov_models (list[CovModel]): A list of covariates models. inlier_pct (float, optional): A float number between 0 and 1 indicate the percentage of inliers. """ @@ -369,14 +369,14 @@ def predict( return prediction - def sample_soln(self, sample_size: int = 1) -> Tuple[NDArray, NDArray]: + def sample_soln(self, sample_size: int = 1) -> tuple[NDArray, NDArray]: """Sample solutions. Args: sample_size (int, optional): Number of samples. Return: - Tuple[NDArray, NDArray]: + tuple[NDArray, NDArray]: Return beta samples and gamma samples. """ if self.lt is None: @@ -437,7 +437,7 @@ def create_draws( return y_samples.T - def summary(self) -> Tuple[pd.DataFrame, pd.DataFrame]: + def summary(self) -> tuple[pd.DataFrame, pd.DataFrame]: """Return the summary data frame.""" fe = pd.DataFrame(utils.ravel_dict(self.fe_soln), index=[0]) re_var = pd.DataFrame(utils.ravel_dict(self.re_var_soln), index=[0]) @@ -453,7 +453,7 @@ def __init__( data: MRData, ensemble_cov_model: CovModel, ensemble_knots: NDArray, - cov_models: Union[List[CovModel], None] = None, + cov_models: Union[list[CovModel], None] = None, inlier_pct: float = 1.0, ): """Constructor of `MRBeRT` @@ -462,7 +462,7 @@ def __init__( data (MRData): Data for meta-regression. ensemble_cov_model (CovModel): Covariates model which will be used with ensemble. - cov_models (Union[List[CovModel], None], optional): + cov_models (Union[list[CovModel], None], optional): Other covariate models, assume to be mutual exclusive with ensemble_cov_mdoel. inlier_pct (float): A float number between 0 and 1 indicate the percentage of inliers. """ @@ -547,7 +547,7 @@ def score_model( def sample_soln( self, sample_size: int = 1 - ) -> Tuple[List[NDArray], List[NDArray]]: + ) -> tuple[list[NDArray], list[NDArray]]: """Sample solution.""" sample_sizes = np.random.multinomial(sample_size, self.weights) @@ -601,8 +601,8 @@ def predict( def create_draws( self, data: MRData, - beta_samples: List[NDArray], - gamma_samples: List[NDArray], + beta_samples: list[NDArray], + gamma_samples: list[NDArray], random_study: bool = True, sort_by_data_id: bool = False, ) -> NDArray: @@ -640,7 +640,7 @@ def create_draws( return y_samples - def summary(self) -> Tuple[pd.DataFrame, pd.DataFrame]: + def summary(self) -> tuple[pd.DataFrame, pd.DataFrame]: """Create summary data frame.""" summary_list = [sub_model.summary() for sub_model in self.sub_models] fe = pd.concat([summary_list[i][0] for i in range(self.num_sub_models)]) diff --git a/src/mrtool/core/plots.py b/src/mrtool/core/plots.py index 16d48e2..5a16dd8 100644 --- a/src/mrtool/core/plots.py +++ b/src/mrtool/core/plots.py @@ -29,14 +29,14 @@ def plot_risk_function( Beta samples generated using `sample_soln` function in MRBRT gamma_samples (np.ndarray): Gamma samples generated using `sample_soln` function in MRBRT - alt_cov_names (List[str], optional): + alt_cov_names (list[str], optional): Name of the alternative exposures, if `None` use `['b_0', 'b_1']`. Default to `None`. - ref_cov_names (List[str], optional): + ref_cov_names (list[str], optional): Name of the reference exposures, if `None` use `['a_0', 'a_1']`. Default to `None`. continuous_variables (list): - List of continuous covariate names. + list of continuous covariate names. plot_note (str): The notes intended to be written on the title. plots_dir (str): @@ -139,10 +139,10 @@ def plot_derivative_fit( MRBRT object. pair (str): risk_outcome pair. eg. 'redmeat_colorectal' - alt_cov_names (List[str], optional): + alt_cov_names (list[str], optional): Name of the alternative exposures, if `None` use `['b_0', 'b_1']`. Default to `None`. - ref_cov_names (List[str], optional): + ref_cov_names (list[str], optional): Name of the reference exposures, if `None` use `['a_0', 'a_1']`. Default to `None`. plot_note (str, optional): diff --git a/src/mrtool/core/utils.py b/src/mrtool/core/utils.py index d55b5df..859610d 100644 --- a/src/mrtool/core/utils.py +++ b/src/mrtool/core/utils.py @@ -5,7 +5,8 @@ `utils` module of the `mrtool` package. """ -from typing import Union, List, Any, Tuple +from typing import Any, Union + import numpy as np import pandas as pd @@ -106,7 +107,7 @@ def sizes_to_indices(sizes): An array consist of non-negative number. Returns: list{range}: - List the indices. + list the indices. """ indices = [] a = 0 @@ -385,7 +386,7 @@ def _check_min_dist( def _check_feasibility( num_knots: int, knot_bounds: np.ndarray, min_dist: np.ndarray -) -> Tuple[np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray]: """Check knot feasibility and get left and right boundaries.""" if np.sum(min_dist) > knot_bounds[-1, 1] - knot_bounds[0, 0]: raise ValueError("min_dist cannot exceed knot_bounds") @@ -509,14 +510,14 @@ def empty_array(): return np.array(list()) -def to_list(obj: Any) -> List[Any]: +def to_list(obj: Any) -> list[Any]: """Convert objective to list of object. Args: obj (Any): Object need to be convert. Returns: - List[Any]: + list[Any]: If `obj` already is a list object, return `obj` itself, otherwise wrap `obj` with a list and return it. """ @@ -550,7 +551,7 @@ def is_numeric_array(array: np.ndarray) -> bool: def expand_array( - array: np.ndarray, shape: Tuple[int], value: Any, name: str + array: np.ndarray, shape: tuple[int], value: Any, name: str ) -> np.ndarray: """Expand array when it is empty. @@ -558,7 +559,7 @@ def expand_array( array (np.ndarray): Target array. If array is empty, fill in the ``value``. And When it is not empty assert the ``shape`` agrees and return the original array. - shape (Tuple[int]): The expected shape of the array. + shape (tuple[int]): The expected shape of the array. value (Any): The expected value in final array. name (str): Variable name of the array (for error message). diff --git a/src/mrtool/cov_selection/covfinder.py b/src/mrtool/cov_selection/covfinder.py index 184fb21..04423e4 100644 --- a/src/mrtool/cov_selection/covfinder.py +++ b/src/mrtool/cov_selection/covfinder.py @@ -6,7 +6,7 @@ import warnings from copy import deepcopy -from typing import Dict, List, Tuple, Union +from typing import Union import numpy as np @@ -22,34 +22,34 @@ class CovFinder: def __init__( self, data: MRData, - covs: List[str], - pre_selected_covs: Union[List[str], None] = None, + covs: list[str], + pre_selected_covs: Union[list[str], None] = None, normalized_covs: bool = True, num_samples: int = 1000, laplace_threshold: float = 1e-5, - power_range: Tuple[float, float] = (-8, 8), + power_range: tuple[float, float] = (-8, 8), power_step_size: float = 0.5, inlier_pct: float = 1.0, alpha: float = 0.05, - beta_gprior: Dict[str, np.ndarray] = None, + beta_gprior: dict[str, np.ndarray] = None, beta_gprior_std: float = 1.0, bias_zero: bool = False, - use_re: Union[Dict, None] = None, + use_re: Union[dict, None] = None, ): """Covariate Finder. Args: data (MRData): Data object used for variable selection. - covs (List[str]): Candidate covariates. + covs (list[str]): Candidate covariates. normalized_covs (bool): If true, will normalize the covariates. - pre_selected_covs (List[str] | None, optional): + pre_selected_covs (list[str] | None, optional): Pre-selected covaraites, will always be in the selected list. num_samples (int, optional): Number of samples used for judging if a variable is significance. laplace_threshold (float, optional): When coefficients from the Laplace regression is above this value, we consider it as the potential useful covariate. - power_range (Tuple[float, float], optional): + power_range (tuple[float, float], optional): Power range for the Laplace prior standard deviation. Laplace prior standard deviation will go from `10**power_range[0]` to `10**power_range[1]`. @@ -59,7 +59,7 @@ def __init__( beta_gprior_std (float, optional): Loose beta Gaussian prior standard deviation. Default to 1. bias_zero (bool, optional): If `True`, fit when specify the Gaussian prior it will be mean zero. Default to `False`. - use_re (Union[Dict, None], optional): + use_re (Union[dict, None], optional): A dictionary of use_re for each covariate. When `None` we have an uninformative prior for the random effects variance. Default to `None`. """ @@ -107,14 +107,14 @@ def __init__( def create_model( self, - covs: List[str], + covs: list[str], prior_type: str = "Laplace", laplace_std: float = None, ) -> MRBRT: """Create Gaussian or Laplace model. Args: - covs (List[str]): A list of covariates need to be included in the model. + covs (list[str]): A list of covariates need to be included in the model. prior_type (str): Indicate if use ``Gaussian`` or ``Laplace`` model. laplace_std (float): Standard deviation of the Laplace prior. Default to None. @@ -166,11 +166,11 @@ def create_model( ) return model - def fit_gaussian_model(self, covs: List[str]) -> MRBRT: + def fit_gaussian_model(self, covs: list[str]) -> MRBRT: """Fit Gaussian model. Args: - covs (List[str]): A list of covariates need to be included in the model. + covs (list[str]): A list of covariates need to be included in the model. Returns: MRBRT: the fitted model object. @@ -182,11 +182,11 @@ def fit_gaussian_model(self, covs: List[str]) -> MRBRT: gaussian_model.lt.gamma = empirical_gamma return gaussian_model - def fit_laplace_model(self, covs: List[str], laplace_std: float) -> MRBRT: + def fit_laplace_model(self, covs: list[str], laplace_std: float) -> MRBRT: """Fit Laplace model. Args: - covs (List[str]): A list of covariates need to be included in the model. + covs (list[str]): A list of covariates need to be included in the model. laplace_std (float): The Laplace prior std. Returns: @@ -202,7 +202,7 @@ def fit_laplace_model(self, covs: List[str], laplace_std: float) -> MRBRT: def summary_gaussian_model( self, gaussian_model: MRBRT - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: """Summary the gaussian model. Return the mean standard deviation and the significance indicator of beta. @@ -210,7 +210,7 @@ def summary_gaussian_model( gaussian_model (MRBRT): Gaussian model object. Returns: - Tuple[np.ndarray, np.ndarray, np.ndarray]: + tuple[np.ndarray, np.ndarray, np.ndarray]: Mean, standard deviation and indicator of the significance of beta solution. """ beta_samples = gaussian_model.lt.sample_beta(size=self.num_samples) @@ -278,12 +278,12 @@ def select_covs_by_laplace(self, laplace_std: float, verbose: bool = False): self.stop = True def update_beta_gprior( - self, covs: List[str], mean: np.ndarray, std: np.ndarray + self, covs: list[str], mean: np.ndarray, std: np.ndarray ): """Update the beta Gaussian prior. Args: - covs (List[str]): Name of the covariates. + covs (list[str]): Name of the covariates. mean (np.ndarray): Mean of the priors. std (np.ndarray): Standard deviation of the priors. """ From 8dccc930bc8c2649d730f5062ba0cfc4c44878e5 Mon Sep 17 00:00:00 2001 From: zhengp0 Date: Tue, 25 Jun 2024 09:02:43 -0700 Subject: [PATCH 4/5] remove all Union type hints --- src/mrtool/core/data.py | 46 +++++++++++++-------------- src/mrtool/core/model.py | 5 ++- src/mrtool/core/utils.py | 8 ++--- src/mrtool/cov_selection/covfinder.py | 7 ++-- 4 files changed, 30 insertions(+), 36 deletions(-) diff --git a/src/mrtool/core/data.py b/src/mrtool/core/data.py index 57468be..2f6ee65 100644 --- a/src/mrtool/core/data.py +++ b/src/mrtool/core/data.py @@ -8,7 +8,7 @@ import warnings from dataclasses import dataclass, field -from typing import Any, Union +from typing import Any import numpy as np import pandas as pd @@ -208,9 +208,7 @@ def _assert_not_empty(self): if self.is_empty(): raise ValueError("MRData object is empty.") - def is_cov_normalized( - self, covs: Union[list[str], str, None] = None - ) -> bool: + def is_cov_normalized(self, covs: list[str] | str | None = None) -> bool: """Return true when covariates are normalized.""" if covs is None: covs = list(self.covs.keys()) @@ -237,11 +235,11 @@ def reset(self): def load_df( self, data: pd.DataFrame, - col_obs: Union[str, None] = None, - col_obs_se: Union[str, None] = None, - col_covs: Union[list[str], None] = None, - col_study_id: Union[str, None] = None, - col_data_id: Union[str, None] = None, + col_obs: str | None = None, + col_obs_se: str | None = None, + col_covs: list[str] | None = None, + col_study_id: str | None = None, + col_data_id: str | None = None, ): """Load data from data frame.""" self.reset() @@ -273,10 +271,10 @@ def load_df( def load_xr( self, data, - var_obs: Union[str, None] = None, - var_obs_se: Union[str, None] = None, - var_covs: Union[list[str], None] = None, - coord_study_id: Union[str, None] = None, + var_obs: str | None = None, + var_obs_se: str | None = None, + var_covs: list[str] | None = None, + coord_study_id: str | None = None, ): """Load data from xarray.""" self.reset() @@ -314,11 +312,11 @@ def to_df(self) -> pd.DataFrame: return df - def has_covs(self, covs: Union[list[str], str]) -> bool: + def has_covs(self, covs: list[str] | str) -> bool: """If the data has the provided covariates. Args: - covs (Union[list[str], str]): + covs (list[str] | str): list of covariate names or one covariate name. Returns: @@ -330,11 +328,11 @@ def has_covs(self, covs: Union[list[str], str]) -> bool: else: return all([cov in self.covs for cov in covs]) - def has_studies(self, studies: Union[list[Any], Any]) -> bool: + def has_studies(self, studies: list[Any] | Any) -> bool: """If the data has provided study_id Args: - studies Union[list[Any], Any]: + studies list[Any] | Any: list of studies or one study. Returns: @@ -346,7 +344,7 @@ def has_studies(self, studies: Union[list[Any], Any]) -> bool: else: return all([study in self.studies for study in studies]) - def _assert_has_covs(self, covs: Union[list[str], str]): + def _assert_has_covs(self, covs: list[str] | str): """Assert has covariates otherwise raise ValueError.""" if not self.has_covs(covs): covs = to_list(covs) @@ -355,7 +353,7 @@ def _assert_has_covs(self, covs: Union[list[str], str]): f"MRData object do not contain covariates: {missing_covs}." ) - def _assert_has_studies(self, studies: Union[list[Any], Any]): + def _assert_has_studies(self, studies: list[Any] | Any): """Assert has studies otherwise raise ValueError.""" if not self.has_studies(studies): studies = to_list(studies) @@ -366,11 +364,11 @@ def _assert_has_studies(self, studies: Union[list[Any], Any]): f"MRData object do not contain studies: {missing_studies}." ) - def get_covs(self, covs: Union[list[str], str]) -> np.ndarray: + def get_covs(self, covs: list[str] | str) -> np.ndarray: """Get covariate matrix. Args: - covs (Union[list[str], str]): + covs (list[str] | str): list of covariate names or one covariate name. Returns: @@ -385,11 +383,11 @@ def get_covs(self, covs: Union[list[str], str]) -> np.ndarray: [self.covs[cov_names][:, None] for cov_names in covs] ) - def get_study_data(self, studies: Union[list[Any], Any]) -> "MRData": + def get_study_data(self, studies: list[Any] | Any) -> "MRData": """Get study specific data. Args: - studies (Union[list[Any], Any]): list of studies or one study. + studies (list[Any] | Any): list of studies or one study. Returns MRData: Data object contains the study specific data. @@ -399,7 +397,7 @@ def get_study_data(self, studies: Union[list[Any], Any]) -> "MRData": index = np.array([study in studies for study in self.study_id]) return self._get_data(index) - def normalize_covs(self, covs: Union[list[str], str, None] = None): + def normalize_covs(self, covs: list[str] | str | None = None): """Normalize covariates by the largest absolute value for each covariate.""" if covs is None: covs = list(self.covs.keys()) diff --git a/src/mrtool/core/model.py b/src/mrtool/core/model.py index c25eac5..4298b62 100644 --- a/src/mrtool/core/model.py +++ b/src/mrtool/core/model.py @@ -7,7 +7,6 @@ """ from copy import deepcopy -from typing import Union import numpy as np import pandas as pd @@ -453,7 +452,7 @@ def __init__( data: MRData, ensemble_cov_model: CovModel, ensemble_knots: NDArray, - cov_models: Union[list[CovModel], None] = None, + cov_models: list[CovModel] | None = None, inlier_pct: float = 1.0, ): """Constructor of `MRBeRT` @@ -462,7 +461,7 @@ def __init__( data (MRData): Data for meta-regression. ensemble_cov_model (CovModel): Covariates model which will be used with ensemble. - cov_models (Union[list[CovModel], None], optional): + cov_models (list[CovModel] | None, optional): Other covariate models, assume to be mutual exclusive with ensemble_cov_mdoel. inlier_pct (float): A float number between 0 and 1 indicate the percentage of inliers. """ diff --git a/src/mrtool/core/utils.py b/src/mrtool/core/utils.py index 859610d..d620f97 100644 --- a/src/mrtool/core/utils.py +++ b/src/mrtool/core/utils.py @@ -5,7 +5,7 @@ `utils` module of the `mrtool` package. """ -from typing import Any, Union +from typing import Any import numpy as np import pandas as pd @@ -294,7 +294,7 @@ def avg_integral(mat, spline=None, use_spline_intercept=False): def sample_knots( num_knots: int, knot_bounds: np.ndarray, - min_dist: Union[float, np.ndarray], + min_dist: float | np.ndarray, num_samples: int = 1, ) -> np.ndarray: """Sample knot vectors given a set of rules. @@ -367,9 +367,7 @@ def _check_knot_bounds(num_knots: int, knot_bounds: np.ndarray) -> np.ndarray: return knot_bounds -def _check_min_dist( - num_knots: int, min_dist: Union[float, np.ndarray] -) -> np.ndarray: +def _check_min_dist(num_knots: int, min_dist: float | np.ndarray) -> np.ndarray: """Check knot min_dist.""" if np.isscalar(min_dist): min_dist = np.tile(min_dist, num_knots + 1) diff --git a/src/mrtool/cov_selection/covfinder.py b/src/mrtool/cov_selection/covfinder.py index 04423e4..1125fc6 100644 --- a/src/mrtool/cov_selection/covfinder.py +++ b/src/mrtool/cov_selection/covfinder.py @@ -6,7 +6,6 @@ import warnings from copy import deepcopy -from typing import Union import numpy as np @@ -23,7 +22,7 @@ def __init__( self, data: MRData, covs: list[str], - pre_selected_covs: Union[list[str], None] = None, + pre_selected_covs: list[str] | None = None, normalized_covs: bool = True, num_samples: int = 1000, laplace_threshold: float = 1e-5, @@ -34,7 +33,7 @@ def __init__( beta_gprior: dict[str, np.ndarray] = None, beta_gprior_std: float = 1.0, bias_zero: bool = False, - use_re: Union[dict, None] = None, + use_re: dict | None = None, ): """Covariate Finder. @@ -59,7 +58,7 @@ def __init__( beta_gprior_std (float, optional): Loose beta Gaussian prior standard deviation. Default to 1. bias_zero (bool, optional): If `True`, fit when specify the Gaussian prior it will be mean zero. Default to `False`. - use_re (Union[dict, None], optional): + use_re (dict | None, optional): A dictionary of use_re for each covariate. When `None` we have an uninformative prior for the random effects variance. Default to `None`. """ From 9babcbac34489ce03ae41af4de18ec515d210ced Mon Sep 17 00:00:00 2001 From: zhengp0 Date: Tue, 25 Jun 2024 09:26:30 -0700 Subject: [PATCH 5/5] switch google style docstring to numpy style --- src/mrtool/core/cov_model.py | 266 +++++++++++++------------ src/mrtool/core/data.py | 80 +++++--- src/mrtool/core/model.py | 122 +++++++----- src/mrtool/core/plots.py | 88 +++++---- src/mrtool/core/utils.py | 268 +++++++++++++++----------- src/mrtool/cov_selection/covfinder.py | 132 ++++++++----- 6 files changed, 560 insertions(+), 396 deletions(-) diff --git a/src/mrtool/core/cov_model.py b/src/mrtool/core/cov_model.py index bb877a8..89100db 100644 --- a/src/mrtool/core/cov_model.py +++ b/src/mrtool/core/cov_model.py @@ -59,97 +59,99 @@ def __init__( ): """Constructor of the covariate model. - Args: - alt_cov (str | list{str}): - Main covariate name, when it is a list consists of two - covariates names, use the average integrand between defined by - the two covariates. - ref_cov (str | list{str} | None, optional): - Reference covariate name, will be interpreted differently in the - sub-classes. - name (str | None, optional): - Model name for easy access. - use_re (bool, optional): - If use the random effects. - use_re_mid_point (bool, optional): - If use the midpoint for the random effects. - use_spline(bool, optional): - If use splines. - use_spline_intercept(bool, optional): - If `True`, use full set of the spline bases, shouldn't include extra `intercept` in this case. - spline_knots_type (str, optional): - The method of how to place the knots, `'frequency'` place the - knots according to the data quantile and `'domain'` place the - knots according to the domain of the data. - spline_knots (np.ndarray, optional): - A numpy array between 0 and 1 contains the relative position of - the knots placement, with respect to either frequency or domain. - spline_degree (int, optional): - The degree of the spline. - spline_l_linear (bool, optional): - If use left linear tail. - spline_r_linear (bool, optional): - If use right linear tail. - prior_spline_derval_gaussian (np.ndarray, optional): - Gaussian prior for the derivative value of the spline. - prior_spline_derval_gaussian_domain (tuple[float, float], optional): - Domain for the Gaussian prior for the derivative value of the spline. - prior_spline_derval_uniform (np.ndarray, optional): - Uniform prior for the derivative value of the spline. - prior_spline_derval_uniform_domain (tuple[float, float], optional): - Domain for the uniform prior for the derivative value of the spline. - prior_spline_der2val_gaussian (np.ndarray, optional): - Gaussian prior for the second order derivative value of the spline. - prior_spline_der2val_gaussian_domain (tuple[float, float], optional): - Domain for the Gaussian prior for the second order derivative value of the spline. - prior_spline_der2val_uniform (np.ndarray, optional): - Uniform prior for the second order derivative value of the spline. - prior_spline_der2val_uniform_domain (tuple[float, float], optional): - Domain for the uniform prior for the second order derivative value of the spline. - prior_spline_funval_gaussian (np.ndarray, optional): - Gaussian prior for the function value of the spline. - prior_spline_funval_gaussian_domain (tuple[float, float], optional): - Domain for the Gaussian prior for the function value of the spline. - prior_spline_funval_uniform (np.ndarray, optional): - Uniform prior for the function value of the spline. - prior_spline_funval_uniform_domain (tuple[float, float], optional): - Domain for the uniform prior for the function value of the spline. - prior_spline_monotonicity (str | None, optional): - Spline shape prior, `'increasing'` indicates spline is - increasing, `'decreasing'` indicates spline is decreasing. - prior_spline_monotonicity_domain (tuple[float, float], optional): - Domain where spline monotonicity prior applies. Default to `(0.0, 1.0)`. - prior_spline_convexity_domain (tuple[float, float], optional): - Domain where spline convexity prior applies. Default to `(0.0, 1.0)`. - prior_spline_convexity (str | None, optional): - Spline shape prior, `'convex'` indicate if spline is convex and - `'concave'` indicate spline is concave. - prior_spline_num_constraint_points (int, optional): - Number of constraint points used in the the shape constraints - of the spline. - prior_spline_maxder_gaussian (numpy.ndarray, optional): - Gaussian prior on the highest derivative of the spline. - When it is a one dimensional array, the first element will be - the mean for all derivative and second element will be the sd. - When it is a two dimensional array, the first row will be the - mean and the second row will be the sd, the number of columns - should match the number of the intervals defined by the spline - knots. - prior_spline_maxder_uniform (numpy.ndarray, optional) - Uniform prior on the highest derivative of the spline. - prior_beta_gaussian (numpy.ndarray, optional): - Direct Gaussian prior for beta. It can be one dimensional or - two dimensional array like `prior_spline_maxder_gaussian`. - prior_beta_uniform (numpy.ndarray, optional): - Direct uniform prior for beta. - prior_beta_laplace (numpy.ndarray, optional): - Direct Laplace prior for beta. - prior_gamma_gaussian (numpy.ndarray, optional): - Direct Gaussian prior for gamma. - prior_gamma_uniform (numpy.ndarray, optional): - Direct uniform prior for gamma. - prior_gamma_laplace (numpy.ndarray, optional): - Direct Laplace prior for gamma. + Parameters + ---------- + alt_cov + Main covariate name, when it is a list consists of two + covariates names, use the average integrand between defined by + the two covariates. + ref_cov + Reference covariate name, will be interpreted differently in the + sub-classes. + name + Model name for easy access. + use_re + If use the random effects. + use_re_mid_point + If use the midpoint for the random effects. + use_spline + If use splines. + use_spline_intercept + If `True`, use full set of the spline bases, shouldn't include extra `intercept` in this case. + spline_knots_type + The method of how to place the knots, `'frequency'` place the + knots according to the data quantile and `'domain'` place the + knots according to the domain of the data. + spline_knots + A numpy array between 0 and 1 contains the relative position of + the knots placement, with respect to either frequency or domain. + spline_degree + The degree of the spline. + spline_l_linear + If use left linear tail. + spline_r_linear + If use right linear tail. + prior_spline_derval_gaussian + Gaussian prior for the derivative value of the spline. + prior_spline_derval_gaussian_domain + Domain for the Gaussian prior for the derivative value of the spline. + prior_spline_derval_uniform + Uniform prior for the derivative value of the spline. + prior_spline_derval_uniform_domain + Domain for the uniform prior for the derivative value of the spline. + prior_spline_der2val_gaussian + Gaussian prior for the second order derivative value of the spline. + prior_spline_der2val_gaussian_domain + Domain for the Gaussian prior for the second order derivative value of the spline. + prior_spline_der2val_uniform + Uniform prior for the second order derivative value of the spline. + prior_spline_der2val_uniform_domain + Domain for the uniform prior for the second order derivative value of the spline. + prior_spline_funval_gaussian + Gaussian prior for the function value of the spline. + prior_spline_funval_gaussian_domain + Domain for the Gaussian prior for the function value of the spline. + prior_spline_funval_uniform + Uniform prior for the function value of the spline. + prior_spline_funval_uniform_domain + Domain for the uniform prior for the function value of the spline. + prior_spline_monotonicity + Spline shape prior, `'increasing'` indicates spline is + increasing, `'decreasing'` indicates spline is decreasing. + prior_spline_monotonicity_domain + Domain where spline monotonicity prior applies. Default to `(0.0, 1.0)`. + prior_spline_convexity_domain + Domain where spline convexity prior applies. Default to `(0.0, 1.0)`. + prior_spline_convexity + Spline shape prior, `'convex'` indicate if spline is convex and + `'concave'` indicate spline is concave. + prior_spline_num_constraint_points + Number of constraint points used in the the shape constraints + of the spline. + prior_spline_maxder_gaussian + Gaussian prior on the highest derivative of the spline. + When it is a one dimensional array, the first element will be + the mean for all derivative and second element will be the sd. + When it is a two dimensional array, the first row will be the + mean and the second row will be the sd, the number of columns + should match the number of the intervals defined by the spline + knots. + prior_spline_maxder_uniform + Uniform prior on the highest derivative of the spline. + prior_beta_gaussian + Direct Gaussian prior for beta. It can be one dimensional or + two dimensional array like `prior_spline_maxder_gaussian`. + prior_beta_uniform + Direct uniform prior for beta. + prior_beta_laplace + Direct Laplace prior for beta. + prior_gamma_gaussian + Direct Gaussian prior for gamma. + prior_gamma_uniform + Direct uniform prior for gamma. + prior_gamma_laplace + Direct Laplace prior for gamma. + """ self.covs = [] self.alt_cov = utils.input_cols(alt_cov, append_to=self.covs) @@ -439,13 +441,18 @@ def create_spline( self, data: MRData, spline_knots: np.ndarray = None ) -> xspline.XSpline: """Create spline given current spline parameters. - Args: - data (mrtool.MRData): - The data frame used for storing the data - spline_knots (np.ndarray, optional): - Spline knots, if ``None`` determined by frequency or domain. - Returns: - xspline.XSpline: The spline object. + Parameters + ---------- + data + The data frame used for storing the data + spline_knots + Spline knots, if ``None`` determined by frequency or domain. + + Returns + ------- + xspline.XSpline + The spline object. + """ # extract covariate alt_cov = data.get_covs(self.alt_cov) @@ -518,14 +525,18 @@ def create_spline( return spline - def create_design_mat(self, data): + def create_design_mat(self, data) -> tuple[np.ndarray, np.ndarray]: """Create design matrix. - Args: - data (mrtool.MRData): - The data frame used for storing the data - Returns: - tuple{numpy.ndarray, numpy.ndarray}: - Return the design matrix for linear cov or spline. + Parameters + ---------- + data + The data frame used for storing the data + + Returns + ------- + tuple[numpy.ndarray, numpy.ndarray] + Return the design matrix for linear cov or spline. + """ alt_cov = data.get_covs(self.alt_cov) ref_cov = data.get_covs(self.ref_cov) @@ -812,13 +823,16 @@ def create_x_fun(self, data: MRData): def create_z_mat(self, data): """Create design matrix for the random effects. - Args: - data (mrtool.MRData): - The data frame used for storing the data + Parameters + ---------- + data + The data frame used for storing the data + + Returns + ------- + numpy.ndarray + Design matrix for random effects. - Returns: - numpy.ndarray: - Design matrix for random effects. """ if not self.use_re: return np.array([]).reshape(data.num_obs, 0) @@ -849,13 +863,16 @@ def __init__(self, *args, **kwargs): def create_x_fun(self, data): """Create design functions for the fixed effects. - Args: - data (mrtool.MRData): - The data frame used for storing the data + Parameters + ---------- + data + The data frame used for storing the data + + Returns + ------- + tuple[Callable, Callable] + Design functions for fixed effects. - Returns: - tuple{function, function}: - Design functions for fixed effects. """ alt_mat, ref_mat = self.create_design_mat(data) add_one = not (self.use_spline and self.use_spline_intercept) @@ -864,13 +881,16 @@ def create_x_fun(self, data): def create_z_mat(self, data): """Create design matrix for the random effects. - Args: - data (mrtool.MRData): - The data frame used for storing the data + Parameters + ---------- + data + The data frame used for storing the data + + Returns + ------- + numpy.ndarray + Design matrix for random effects. - Returns: - numpy.ndarray: - Design matrix for random effects. """ if not self.use_re: return np.array([]).reshape(data.num_obs, 0) diff --git a/src/mrtool/core/data.py b/src/mrtool/core/data.py index 2f6ee65..9fbbf7c 100644 --- a/src/mrtool/core/data.py +++ b/src/mrtool/core/data.py @@ -124,8 +124,11 @@ def _get_study_structure(self): def _sort_data(self, index: np.ndarray): """Sort the object. - Args: - index (np.ndarray): Sorting index. + Parameters + ---------- + index + Sorting index. + """ index = np.array(index) assert ( @@ -166,8 +169,11 @@ def _remove_nan_in_covs(self): def _remove_data(self, index: np.ndarray): """Remove the data point by index. - Args: - index (np.ndarray): Bool array, when ``True`` delete corresponding data. + Parameters + ---------- + index + Bool array, when ``True`` delete corresponding data. + """ assert len(index) == self.num_obs assert all([isinstance(i, (bool, np.bool_)) for i in index]) @@ -183,11 +189,16 @@ def _remove_data(self, index: np.ndarray): def _get_data(self, index: np.ndarray) -> "MRData": """Get the data point by index. - Args: - index (np.ndarray): Indices of the data we want to get. + Parameters + ---------- + index + Indices of the data we want to get. + + Returns + ------- + MRData + data object contains the data from indices. - Returns: - MRData: data object contains the data from indices. """ obs = self.obs[index].copy() obs_se = self.obs_se[index].copy() @@ -315,12 +326,16 @@ def to_df(self) -> pd.DataFrame: def has_covs(self, covs: list[str] | str) -> bool: """If the data has the provided covariates. - Args: - covs (list[str] | str): - list of covariate names or one covariate name. + Parameters + ---------- + covs + list of covariate names or one covariate name. + + Returns + ------- + bool + If has covariates return `True`. - Returns: - bool: If has covariates return `True`. """ covs = to_list(covs) if len(covs) == 0: @@ -331,12 +346,16 @@ def has_covs(self, covs: list[str] | str) -> bool: def has_studies(self, studies: list[Any] | Any) -> bool: """If the data has provided study_id - Args: - studies list[Any] | Any: - list of studies or one study. + Parameters + ---------- + studies + list of studies or one study. + + Returns + ------- + bool + If has studies return `True`. - Returns: - bool: If has studies return `True`. """ studies = to_list(studies) if len(studies) == 0: @@ -367,12 +386,16 @@ def _assert_has_studies(self, studies: list[Any] | Any): def get_covs(self, covs: list[str] | str) -> np.ndarray: """Get covariate matrix. - Args: - covs (list[str] | str): - list of covariate names or one covariate name. + Parameters + ---------- + covs + list of covariate names or one covariate name. + + Returns + ------- + np.ndarray + Covariates matrix, in the column fashion. - Returns: - np.ndarray: Covariates matrix, in the column fashion. """ covs = to_list(covs) self._assert_has_covs(covs) @@ -386,11 +409,16 @@ def get_covs(self, covs: list[str] | str) -> np.ndarray: def get_study_data(self, studies: list[Any] | Any) -> "MRData": """Get study specific data. - Args: - studies (list[Any] | Any): list of studies or one study. + Parameters + ---------- + studies + list of studies or one study. Returns - MRData: Data object contains the study specific data. + ------- + MRData + Data object contains the study specific data. + """ self._assert_has_studies(studies) studies = to_list(studies) diff --git a/src/mrtool/core/model.py b/src/mrtool/core/model.py index 4298b62..4cb0b5c 100644 --- a/src/mrtool/core/model.py +++ b/src/mrtool/core/model.py @@ -26,11 +26,15 @@ def __init__( ): """Constructor of MRBRT. - Args: - data (MRData): Data for meta-regression. - cov_models (list[CovModel]): A list of covariates models. - inlier_pct (float, optional): - A float number between 0 and 1 indicate the percentage of inliers. + Parameters + ---------- + data + Data for meta-regression. + cov_models + A list of covariates models. + inlier_pct + A float number between 0 and 1 indicate the percentage of inliers. + """ self.data = data self.cov_models = cov_models @@ -223,8 +227,10 @@ def create_lprior(self): def fit_model(self, **fit_options): """Fitting the model through limetr. - Args: - fit_options: please check fit arguments in limetr. + Parameters + ---------- + fit_options + please check fit arguments in limetr. """ if not all([cov_model.has_data() for cov_model in self.cov_models]): @@ -340,18 +346,23 @@ def predict( ) -> NDArray: """Create new prediction with existing solution. - Args: - data (MRData): MRData object contains the predict data. - predict_for_study (bool, optional): - If `True`, use the random effects information to prediction for specific - study. If the `study_id` in `data` do not contain in the fitting data, it - will assume the corresponding random effects equal to 0. - sort_by_data_id (bool, optional): - If `True`, will sort the final prediction as the order of the original - data frame that used to create the `data`. Default to False. - - Returns: - NDArray: Predicted outcome array. + Parameters + ---------- + data + MRData object contains the predict data. + predict_for_study + If `True`, use the random effects information to prediction for specific + study. If the `study_id` in `data` do not contain in the fitting data, it + will assume the corresponding random effects equal to 0. + sort_by_data_id + If `True`, will sort the final prediction as the order of the original + data frame that used to create the `data`. Default to False. + + Returns + ------- + NDArray + Predicted outcome array. + """ assert data.has_covs( self.cov_names @@ -371,12 +382,16 @@ def predict( def sample_soln(self, sample_size: int = 1) -> tuple[NDArray, NDArray]: """Sample solutions. - Args: - sample_size (int, optional): Number of samples. + Parameters + ---------- + sample_size + Number of samples. + + Returns + ------- + tuple[NDArray, NDArray] + Return beta samples and gamma samples. - Return: - tuple[NDArray, NDArray]: - Return beta samples and gamma samples. """ if self.lt is None: raise ValueError("Please fit the model first.") @@ -398,18 +413,25 @@ def create_draws( ) -> NDArray: """Create draws for the given data set. - Args: - data (MRData): MRData object contains predict data. - beta_samples (NDArray): Samples of beta. - gamma_samples (NDArray): Samples of gamma. - random_study (bool, optional): - If `True` the draws will include uncertainty from study heterogeneity. - sort_by_data_id (bool, optional): - If `True`, will sort the final prediction as the order of the original - data frame that used to create the `data`. Default to False. - - Returns: - NDArray: Returns outcome sample matrix. + Parameters + ---------- + data + MRData object contains predict data. + beta_samples + Samples of beta. + gamma_samples + Samples of gamma. + random_study + If `True` the draws will include uncertainty from study heterogeneity. + sort_by_data_id + If `True`, will sort the final prediction as the order of the original + data frame that used to create the `data`. Default to False. + + Returns + ------- + NDArray + Returns outcome sample matrix. + """ sample_size = beta_samples.shape[0] assert beta_samples.shape == (sample_size, self.num_x_vars) @@ -457,13 +479,17 @@ def __init__( ): """Constructor of `MRBeRT` - Args: - data (MRData): Data for meta-regression. - ensemble_cov_model (CovModel): - Covariates model which will be used with ensemble. - cov_models (list[CovModel] | None, optional): - Other covariate models, assume to be mutual exclusive with ensemble_cov_mdoel. - inlier_pct (float): A float number between 0 and 1 indicate the percentage of inliers. + Parameters + ---------- + data + Data for meta-regression. + ensemble_cov_model + Covariates model which will be used with ensemble. + cov_models + Other covariate models, assume to be mutual exclusive with ensemble_cov_mdoel. + inlier_pct + A float number between 0 and 1 indicate the percentage of inliers. + """ self.data = data self.cov_models = cov_models if cov_models is not None else [] @@ -576,10 +602,12 @@ def predict( ) -> NDArray: """Create new prediction with existing solution. - Args: - return_avg (bool): - When it is `True`, the function will return an average prediction based on the score, - and when it is `False` the function will return a list of predictions from all groups. + Parameters + ---------- + return_avg + When it is `True`, the function will return an average prediction based on the score, + and when it is `False` the function will return a list of predictions from all groups. + """ prediction = np.vstack( [ diff --git a/src/mrtool/core/plots.py b/src/mrtool/core/plots.py index 5a16dd8..86bcd9a 100644 --- a/src/mrtool/core/plots.py +++ b/src/mrtool/core/plots.py @@ -20,30 +20,32 @@ def plot_risk_function( write_file=False, ): """Plot predicted relative risk. - Args: - mrbrt (mrtool.MRBRT): - MRBeRT object. - pair (str): - risk_outcome pair. eg. 'redmeat_colorectal' - beta_samples (np.ndarray): - Beta samples generated using `sample_soln` function in MRBRT - gamma_samples (np.ndarray): - Gamma samples generated using `sample_soln` function in MRBRT - alt_cov_names (list[str], optional): - Name of the alternative exposures, if `None` use `['b_0', 'b_1']`. - Default to `None`. - ref_cov_names (list[str], optional): - Name of the reference exposures, if `None` use `['a_0', 'a_1']`. - Default to `None`. - continuous_variables (list): - list of continuous covariate names. - plot_note (str): - The notes intended to be written on the title. - plots_dir (str): - Directory where to save the plot. - write_file (bool): - Specify `True` if the plot is expected to be saved on disk. - If True, `plots_dir` should be specified too. + Parameters + ---------- + mrbrt + MRBeRT object. + pair + risk_outcome pair. eg. 'redmeat_colorectal' + beta_samples + Beta samples generated using `sample_soln` function in MRBRT + gamma_samples + Gamma samples generated using `sample_soln` function in MRBRT + alt_cov_names + Name of the alternative exposures, if `None` use `['b_0', 'b_1']`. + Default to `None`. + ref_cov_names + Name of the reference exposures, if `None` use `['a_0', 'a_1']`. + Default to `None`. + continuous_variables + list of continuous covariate names. + plot_note + The notes intended to be written on the title. + plots_dir + Directory where to save the plot. + write_file + Specify `True` if the plot is expected to be saved on disk. + If True, `plots_dir` should be specified too. + """ data_df = mrbrt.data.to_df() sub = mrbrt.sub_models[0] @@ -134,24 +136,26 @@ def plot_derivative_fit( write_file=False, ): """Plot fitted derivative. - Args: - mrbrt (mrtool.MRBRT): - MRBRT object. - pair (str): - risk_outcome pair. eg. 'redmeat_colorectal' - alt_cov_names (list[str], optional): - Name of the alternative exposures, if `None` use `['b_0', 'b_1']`. - Default to `None`. - ref_cov_names (list[str], optional): - Name of the reference exposures, if `None` use `['a_0', 'a_1']`. - Default to `None`. - plot_note (str, optional): - The notes intended to be written on the title. - plots_dir (str): - Directory where to save the plot. - write_file (bool): - Specify `True` if the plot is expected to be saved on disk. - If True, `plots_dir` should be specified too. + Parameters + ---------- + mrbrt + MRBRT object. + pair + risk_outcome pair. eg. 'redmeat_colorectal' + alt_cov_names + Name of the alternative exposures, if `None` use `['b_0', 'b_1']`. + Default to `None`. + ref_cov_names + Name of the reference exposures, if `None` use `['a_0', 'a_1']`. + Default to `None`. + plot_note + The notes intended to be written on the title. + plots_dir + Directory where to save the plot. + write_file + Specify `True` if the plot is expected to be saved on disk. + If True, `plots_dir` should be specified too. + """ dose_variable = alt_cov_names[1] data_df = mrbrt.data.to_df() diff --git a/src/mrtool/core/utils.py b/src/mrtool/core/utils.py index d620f97..f3b3782 100644 --- a/src/mrtool/core/utils.py +++ b/src/mrtool/core/utils.py @@ -13,14 +13,18 @@ def get_cols(df, cols): """Return the columns of the given data frame. - Args: - df (pandas.DataFrame): - Given data frame. - cols (str | list{str} | None): - Given column name(s), if is `None`, will return a empty data frame. - Returns: - pandas.DataFrame | pandas.Series: - The data frame contains the columns. + Parameters + ---------- + df + Given data frame. + cols + Given column name(s), if is `None`, will return a empty data frame. + + Returns + ------- + pandas.DataFrame | pandas.Series: + The data frame contains the columns. + """ assert isinstance(df, pd.DataFrame) if isinstance(cols, list): @@ -36,12 +40,16 @@ def get_cols(df, cols): def is_cols(cols): """Check variable type fall into the column name category. - Args: - cols (str | list{str} | None): - Column names candidate. - Returns: - bool: - if `col` is either str, list{str} or None + Parameters + ---------- + cols + Column names candidate. + + Returns + ------- + bool + if `col` is either str, list{str} or None + """ ok = isinstance(cols, (str, list)) or cols is None if isinstance(cols, list): @@ -51,16 +59,20 @@ def is_cols(cols): def input_cols(cols, append_to=None, default=None): """Process the input column name. - Args: - cols (str | list{str} | None): - The input column name(s). - append_to (list{str} | None, optional): - A list keep track of all the column names. - default (str | list{str} | None, optional): - Default value when `cols` is `None`. - Returns: - str | list{str}: - The name of the column(s) + Parameters + ---------- + cols + The input column name(s). + append_to + A list keep track of all the column names. + default + Default value when `cols` is `None`. + + Returns + ------- + str | list{str} + The name of the column(s) + """ assert is_cols(cols) assert is_cols(append_to) @@ -82,13 +94,16 @@ def input_cols(cols, append_to=None, default=None): def combine_cols(cols): """Combine column names into one list of names. - Args: - cols (list{str | list{str}}): - A list of names of columns or list of column names. + Parameters + ---------- + cols + A list of names of columns or list of column names. + + Returns + ------- + list[str] + Combined names of columns. - Return: - list{str}: - Combined names of columns. """ combined_cols = [] for col in cols: @@ -102,12 +117,16 @@ def combine_cols(cols): def sizes_to_indices(sizes): """Converting sizes to corresponding indices. - Args: - sizes (numpy.dnarray): - An array consist of non-negative number. - Returns: - list{range}: - list the indices. + Parameters + ---------- + sizes + An array consist of non-negative number. + + Returns + ------- + list[np.ndarray] + list the indices. + """ indices = [] a = 0 @@ -123,18 +142,20 @@ def sizes_to_indices(sizes): def is_gaussian_prior(prior, size=None): """Check if variable satisfy Gaussian prior format - Args: - prior (numpy.ndarray): - Either one or two dimensional array, with first group refer to mean - and second group refer to standard deviation. + Parameters + ---------- + prior + Either one or two dimensional array, with first group refer to mean + and second group refer to standard deviation. - Keyword Args: - size (int | None, optional): - Size the variable, prior related to. + size + Size the variable, prior related to. + + Returns + ------- + bool + True if satisfy condition. - Returns: - bool: - True if satisfy condition. """ # check type ok = isinstance(prior, np.ndarray) or prior is None @@ -154,18 +175,20 @@ def is_gaussian_prior(prior, size=None): def is_uniform_prior(prior, size=None): """Check if variable satisfy uniform prior format - Args: - prior (numpy.ndarray): - Either one or two dimensional array, with first group refer to lower - bound and second group refer to upper bound. + Parameters + ---------- + prior + Either one or two dimensional array, with first group refer to lower + bound and second group refer to upper bound. + + size + Size the variable, prior related to. - Keyword Args: - size (int | None, optional): - Size the variable, prior related to. + Returns + ------- + bool + True if satisfy condition. - Returns: - bool: - True if satisfy condition. """ # check type ok = isinstance(prior, np.ndarray) or prior is None @@ -182,17 +205,20 @@ def is_uniform_prior(prior, size=None): def input_gaussian_prior(prior, size): """Process the input Gaussian prior - Args: - prior (numpy.ndarray): - Either one or two dimensional array, with first group refer to mean - and second group refer to standard deviation. - size (int, optional): - Size the variable, prior related to. - - Returns: - numpy.ndarray: - Prior after processing, with shape (2, size), with the first row - store the mean and second row store the standard deviation. + Parameters + ---------- + prior + Either one or two dimensional array, with first group refer to mean + and second group refer to standard deviation. + size + Size the variable, prior related to. + + Returns + ------- + numpy.ndarray + Prior after processing, with shape (2, size), with the first row + store the mean and second row store the standard deviation. + """ assert is_gaussian_prior(prior) if prior is None or prior.size == 0: @@ -210,17 +236,20 @@ def input_gaussian_prior(prior, size): def input_uniform_prior(prior, size): """Process the input Gaussian prior - Args: - prior (numpy.ndarray): - Either one or two dimensional array, with first group refer to mean - and second group refer to standard deviation. - size (int, optional): - Size the variable, prior related to. - - Returns: - numpy.ndarray: - Prior after processing, with shape (2, size), with the first row - store the mean and second row store the standard deviation. + Parameters + ---------- + prior + Either one or two dimensional array, with first group refer to mean + and second group refer to standard deviation. + size + Size the variable, prior related to. + + Returns + ------- + numpy.ndarray + Prior after processing, with shape (2, size), with the first row + store the mean and second row store the standard deviation. + """ assert is_uniform_prior(prior) if prior is None or prior.size == 0: @@ -235,19 +264,22 @@ def input_uniform_prior(prior, size): def avg_integral(mat, spline=None, use_spline_intercept=False): """Compute average integral. - Args: - mat (numpy.ndarray): - Matrix that contains the starting and ending points of the integral - or a single column represents the mid-points. - spline (xspline.XSpline | None, optional): - Spline integrate over with, when `None` treat the function as - linear. - use_spline_intercept (bool, optional): - If `True` use all bases from spline, otherwise remove the first bases. - - Returns: - numpy.ndarray: - Design matrix when spline is not `None`, otherwise the mid-points. + Parameters + ---------- + mat + Matrix that contains the starting and ending points of the integral + or a single column represents the mid-points. + spline + Spline integrate over with, when `None` treat the function as + linear. + use_spline_intercept + If `True` use all bases from spline, otherwise remove the first bases. + + Returns + ------- + numpy.ndarray + Design matrix when spline is not `None`, otherwise the mid-points. + """ assert mat.ndim == 2 if mat.size == 0: @@ -511,13 +543,17 @@ def empty_array(): def to_list(obj: Any) -> list[Any]: """Convert objective to list of object. - Args: - obj (Any): Object need to be convert. + Parameters + ---------- + obj + Object need to be convert. + + Returns + ------- + list[Any] + If `obj` already is a list object, return `obj` itself, + otherwise wrap `obj` with a list and return it. - Returns: - list[Any]: - If `obj` already is a list object, return `obj` itself, - otherwise wrap `obj` with a list and return it. """ if isinstance(obj, list): return obj @@ -528,11 +564,16 @@ def to_list(obj: Any) -> list[Any]: def is_numeric_array(array: np.ndarray) -> bool: """Check if an array is numeric. - Args: - array (np.ndarray): Array need to be checked. + Parameters + ---------- + array + Array need to be checked. + + Returns + ------- + bool + True if the array is numeric. - Returns: - bool: True if the array is numeric. """ numerical_dtype_kinds = { "b", # boolean @@ -553,16 +594,23 @@ def expand_array( ) -> np.ndarray: """Expand array when it is empty. - Args: - array (np.ndarray): - Target array. If array is empty, fill in the ``value``. And - When it is not empty assert the ``shape`` agrees and return the original array. - shape (tuple[int]): The expected shape of the array. - value (Any): The expected value in final array. - name (str): Variable name of the array (for error message). + Parameters + ---------- + array + Target array. If array is empty, fill in the ``value``. And + When it is not empty assert the ``shape`` agrees and return the original array. + shape + The expected shape of the array. + value + The expected value in final array. + name + Variable name of the array (for error message). + + Returns + ------- + np.ndarray + Expanded array. - Returns: - np.ndarray: Expanded array. """ array = np.array(array) if len(array) == 0: diff --git a/src/mrtool/cov_selection/covfinder.py b/src/mrtool/cov_selection/covfinder.py index 1125fc6..0dbf0e9 100644 --- a/src/mrtool/cov_selection/covfinder.py +++ b/src/mrtool/cov_selection/covfinder.py @@ -37,30 +37,39 @@ def __init__( ): """Covariate Finder. - Args: - data (MRData): Data object used for variable selection. - covs (list[str]): Candidate covariates. - normalized_covs (bool): If true, will normalize the covariates. - pre_selected_covs (list[str] | None, optional): - Pre-selected covaraites, will always be in the selected list. - num_samples (int, optional): - Number of samples used for judging if a variable is significance. - laplace_threshold (float, optional): - When coefficients from the Laplace regression is above this value, - we consider it as the potential useful covariate. - power_range (tuple[float, float], optional): - Power range for the Laplace prior standard deviation. - Laplace prior standard deviation will go from `10**power_range[0]` - to `10**power_range[1]`. - power_step_size (float, optional): Step size of the swiping across the power range. - inlier_pct (float, optional): Trimming option inlier percentage. Default to 1. - alpha (float, optional): Significance threshold. Default to 0.05. - beta_gprior_std (float, optional): Loose beta Gaussian prior standard deviation. Default to 1. - bias_zero (bool, optional): - If `True`, fit when specify the Gaussian prior it will be mean zero. Default to `False`. - use_re (dict | None, optional): - A dictionary of use_re for each covariate. When `None` we have an uninformative prior - for the random effects variance. Default to `None`. + Parameters + ---------- + data + Data object used for variable selection. + covs + Candidate covariates. + normalized_covs + If true, will normalize the covariates. + pre_selected_covs + Pre-selected covaraites, will always be in the selected list. + num_samples + Number of samples used for judging if a variable is significance. + laplace_threshold + When coefficients from the Laplace regression is above this value, + we consider it as the potential useful covariate. + power_range + Power range for the Laplace prior standard deviation. + Laplace prior standard deviation will go from `10**power_range[0]` + to `10**power_range[1]`. + power_step_size + Step size of the swiping across the power range. + inlier_pct + Trimming option inlier percentage. Default to 1. + alpha + Significance threshold. Default to 0.05. + beta_gprior_std + Loose beta Gaussian prior standard deviation. Default to 1. + bias_zero + If `True`, fit when specify the Gaussian prior it will be mean zero. Default to `False`. + use_re + A dictionary of use_re for each covariate. When `None` we have an uninformative prior + for the random effects variance. Default to `None`. + """ self.data = data @@ -112,13 +121,20 @@ def create_model( ) -> MRBRT: """Create Gaussian or Laplace model. - Args: - covs (list[str]): A list of covariates need to be included in the model. - prior_type (str): Indicate if use ``Gaussian`` or ``Laplace`` model. - laplace_std (float): Standard deviation of the Laplace prior. Default to None. + Parameters + ---------- + covs + A list of covariates need to be included in the model. + prior_type + Indicate if use ``Gaussian`` or ``Laplace`` model. + laplace_std + Standard deviation of the Laplace prior. Default to None. + + Returns + ------- + MRBRT + Created model object. - Return: - MRBRT: Created model object. """ assert prior_type in [ "Laplace", @@ -168,11 +184,16 @@ def create_model( def fit_gaussian_model(self, covs: list[str]) -> MRBRT: """Fit Gaussian model. - Args: - covs (list[str]): A list of covariates need to be included in the model. + Parameters + ---------- + covs + A list of covariates need to be included in the model. + + Returns + ------- + MRBRT + the fitted model object. - Returns: - MRBRT: the fitted model object. """ gaussian_model = self.create_model(covs, prior_type="Gaussian") gaussian_model.fit_model(x0=np.zeros(gaussian_model.num_vars)) @@ -184,12 +205,18 @@ def fit_gaussian_model(self, covs: list[str]) -> MRBRT: def fit_laplace_model(self, covs: list[str], laplace_std: float) -> MRBRT: """Fit Laplace model. - Args: - covs (list[str]): A list of covariates need to be included in the model. - laplace_std (float): The Laplace prior std. + Parameters + ---------- + covs + A list of covariates need to be included in the model. + laplace_std + The Laplace prior std. + + Returns + ------- + MRBRT + the fitted model object. - Returns: - MRBRT: the fitted model object. """ laplace_model = self.create_model( covs, prior_type="Laplace", laplace_std=laplace_std @@ -205,12 +232,16 @@ def summary_gaussian_model( """Summary the gaussian model. Return the mean standard deviation and the significance indicator of beta. - Args: - gaussian_model (MRBRT): Gaussian model object. + Parameters + ---------- + gaussian_model + Gaussian model object. + + Returns + ------- + tuple[np.ndarray, np.ndarray, np.ndarray] + Mean, standard deviation and indicator of the significance of beta solution. - Returns: - tuple[np.ndarray, np.ndarray, np.ndarray]: - Mean, standard deviation and indicator of the significance of beta solution. """ beta_samples = gaussian_model.lt.sample_beta(size=self.num_samples) beta_mean = gaussian_model.beta_soln @@ -281,10 +312,15 @@ def update_beta_gprior( ): """Update the beta Gaussian prior. - Args: - covs (list[str]): Name of the covariates. - mean (np.ndarray): Mean of the priors. - std (np.ndarray): Standard deviation of the priors. + Parameters + ---------- + covs + Name of the covariates. + mean + Mean of the priors. + std + Standard deviation of the priors. + """ for i, cov in enumerate(covs): if cov not in self.all_covs: