Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add more flexibility to visualize.pulls #342

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/cabinetry/fit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def _fit_model_pyhf(

bestfit = pyhf.tensorlib.to_numpy(result[:, 0])
uncertainty = pyhf.tensorlib.to_numpy(result[:, 1])
labels = model.config.par_names()
labels, types = model_utils._labels_modifiers(model)
corr_mat = pyhf.tensorlib.to_numpy(corr_mat)
best_twice_nll = float(best_twice_nll) # convert 0-dim np.ndarray to float

Expand All @@ -98,6 +98,7 @@ def _fit_model_pyhf(
bestfit,
uncertainty,
labels,
types,
corr_mat,
best_twice_nll,
minos_uncertainty=minos_results,
Expand Down Expand Up @@ -145,7 +146,7 @@ def _fit_model_custom(
fix_pars = fix_pars or model.config.suggested_fixed()
par_bounds = par_bounds or model.config.suggested_bounds()

labels = model.config.par_names()
labels, types = model_utils._labels_modifiers(model)

# set initial step size to 0 for fixed parameters
# this will cause the associated parameter uncertainties to be 0 post-fit
Expand Down Expand Up @@ -190,6 +191,7 @@ def twice_nll_func(pars: np.ndarray) -> Any:
bestfit,
uncertainty,
labels,
types,
corr_mat,
best_twice_nll,
minos_uncertainty=minos_results,
Expand Down
4 changes: 3 additions & 1 deletion src/cabinetry/fit/results_containers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Provides containers for inference results."""

from typing import Dict, List, NamedTuple, Tuple
from typing import Dict, List, NamedTuple, Optional, Tuple

import numpy as np

Expand All @@ -13,6 +13,7 @@ class FitResults(NamedTuple):
uncertainty (np.ndarray): uncertainties of best-fit parameter results, evaluated
with Hessian
labels (List[str]): parameter labels
types (List[Optional[str]]): parameter types
corr_mat (np.ndarray): parameter correlation matrix
best_twice_nll (float): -2 log(likelihood) at best-fit point
goodess_of_fit (float, optional): goodness-of-fit p-value, defaults to -1
Expand All @@ -23,6 +24,7 @@ class FitResults(NamedTuple):
bestfit: np.ndarray
uncertainty: np.ndarray
labels: List[str]
types: List[Optional[str]]
corr_mat: np.ndarray
best_twice_nll: float
goodness_of_fit: float = -1
Expand Down
15 changes: 14 additions & 1 deletion src/cabinetry/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import json
import logging
import re
from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union

import awkward as ak
Expand Down Expand Up @@ -516,6 +517,17 @@ def _filter_channels(
return filtered_channels


def _labels_modifiers(
model: pyhf.pdf.Model,
) -> Tuple[List[str], List[Optional[str]]]:
""" """
labels = model.config.par_names()
_mod_dict = dict(model.config.modifiers)
_clean_labels = [re.sub(r"\[.*\]", "", label) for label in labels]
types = [_mod_dict[n] if n in _mod_dict else None for n in _clean_labels]
Comment on lines +524 to +527
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am wondering whether we should extract and save the information about the constraint term (none, Gaussian, Poisson) instead of the modifier types. The problem with the types is that a parameter may control multiple modifiers (e.g. a normsys plus a shapesys). For the purpose of plotting pulls, the constraint term type is all that is needed to decide the handling.

We could get the constraint term types like this:

constraint_terms = []
for parameter in model.config.par_order:
    if not model.config.param_set(parameter).constrained:
        # normfactor / shapefactor
        constraint_terms += [None] * model.config.param_set(parameter).n_parameters
    else:
        # remaining modifiers with Gaussian or Poisson constraints
        constraint_terms += [
            model.config.param_set("staterror_Signal_region").pdf_type
        ] * model.config.param_set("staterror_Signal_region").n_parameters

Going one step further, we could also save the .width() information to evaluate constraints for Poisson-constrained parameters (since the pre-fit uncertainty for those varies per parameter).

For other use cases (like #332) it might be useful to also know all the modifier types.

While it is currently possible to determine the constraint term type from knowing the modifier, that will change when constraint terms become configurable in the future with pyhf. So perhaps it is best to store constraint term information directly, and optionally add another field in the future to also keep track of the modifier types?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am remembering now where the idea of storing the modifier types come from: that allows to exclude by type in the plot, like excluding staterror. The constraint term type does not help there. It seems more likely that users would want to exclude by modifier type than by constraint term type, so perhaps it is best to stick with the implemented approach. The only thing that would need to be generalized is matching multiple modifier types.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something like the following could help simplify things by using more of the pyhf API:

modifier_types = []
for parameter in model.config.par_order:
    modifier_types += [
        [
            mod_type
            for par_name, mod_type in model.config.modifiers
            if par_name == parameter
        ]
    ] * model.config.param_set(parameter).n_parameters

return labels, types


def match_fit_results(model: pyhf.pdf.Model, fit_results: FitResults) -> FitResults:
"""Matches results from a fit to a model by adding or removing parameters as needed.

Expand Down Expand Up @@ -543,7 +555,7 @@ def match_fit_results(model: pyhf.pdf.Model, fit_results: FitResults) -> FitResu

bestfit = asimov_parameters(model) # Asimov parameter values for target model
uncertainty = prefit_uncertainties(model) # pre-fit uncertainties for target model
labels = model.config.par_names() # labels for target model
labels, types = _labels_modifiers(model)

# indices of parameters in current fit results, or None if they are missing
indices_for_corr: List[Optional[int]] = [None] * len(labels)
Expand Down Expand Up @@ -577,6 +589,7 @@ def match_fit_results(model: pyhf.pdf.Model, fit_results: FitResults) -> FitResu
np.asarray(bestfit),
np.asarray(uncertainty),
labels,
types,
corr_mat,
fit_results.best_twice_nll,
fit_results.goodness_of_fit,
Expand Down
27 changes: 23 additions & 4 deletions src/cabinetry/visualize/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""High-level entry point for visualizing fit models and inference results."""

import fnmatch
import glob
import logging
import pathlib
Expand Down Expand Up @@ -441,6 +442,7 @@ def pulls(
*,
figure_folder: Union[str, pathlib.Path] = "figures",
exclude: Optional[Union[str, List[str], Tuple[str, ...]]] = None,
exclude_by_type: Optional[List[str]] = None,
close_figure: bool = True,
save_figure: bool = True,
) -> mpl.figure.Figure:
Expand All @@ -452,7 +454,11 @@ def pulls(
figure_folder (Union[str, pathlib.Path], optional): path to the folder to save
figures in, defaults to "figures"
exclude (Optional[Union[str, List[str], Tuple[str, ...]]], optional): parameter
or parameters to exclude from plot, defaults to None (nothing excluded)
or parameters to exclude from plot, defaults to None (nothing excluded),
compatible with unix wildcards
exclude_by_type (Optional[Union[str, List[str], Tuple[str, ...]]], optional):
exclude parameters of the given type, defaults ``['staterror']`` filtering
out mc_stat uncertainties which are centered on 1
close_figure (bool, optional): whether to close figure, defaults to True
save_figure (bool, optional): whether to save figure, defaults to True

Expand All @@ -462,11 +468,14 @@ def pulls(
# path is None if figure should not be saved
figure_path = pathlib.Path(figure_folder) / "pulls.pdf" if save_figure else None
labels_np = np.asarray(fit_results.labels)
numeric = np.array(
[True if ty in ["normfactor"] else False for ty in fit_results.types]
)

if exclude is None:
exclude_set = set()
elif isinstance(exclude, str):
exclude_set = {exclude}
exclude_set = set(fnmatch.filter(fit_results.labels, exclude))
else:
exclude_set = set(exclude)

Expand All @@ -479,19 +488,29 @@ def pulls(
]
)

# exclude staterror parameters from pull plot (they are centered at 1)
exclude_set.update([label for label in labels_np if label[0:10] == "staterror_"])
# exclude by type
if exclude_by_type is None:
exclude_by_type = ["staterror"]
exclude_set.update(
[
label
for label, kind in zip(labels_np, fit_results.types)
if kind in exclude_by_type
]
)

# filter out user-specified parameters
mask = [True if label not in exclude_set else False for label in labels_np]
bestfit = fit_results.bestfit[mask]
uncertainty = fit_results.uncertainty[mask]
labels_np = labels_np[mask]
numeric = numeric[mask]

fig = plot_result.pulls(
bestfit,
uncertainty,
labels_np,
numeric=numeric,
figure_path=figure_path,
close_figure=close_figure,
)
Expand Down
40 changes: 31 additions & 9 deletions src/cabinetry/visualize/plot_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def pulls(
uncertainty: np.ndarray,
labels: Union[List[str], np.ndarray],
*,
numeric: Optional[Union[List[bool], np.ndarray]] = None,
figure_path: Optional[pathlib.Path] = None,
close_figure: bool = False,
) -> mpl.figure.Figure:
Expand All @@ -80,6 +81,8 @@ def pulls(
bestfit (np.ndarray): best-fit parameter results
uncertainty (np.ndarray): parameter uncertainties
labels (Union[List[str], np.ndarray]): parameter names
numeric Optional[Union[List[bool], np.ndarray]]: which parameters are numeric,
and should only be shown directly
figure_path (Optional[pathlib.Path], optional): path where figure should be
saved, or None to not save it, defaults to None
close_figure (bool, optional): whether to close each figure immediately after
Expand All @@ -90,23 +93,42 @@ def pulls(
matplotlib.figure.Figure: the pull figure
"""
num_pars = len(bestfit)
numeric = np.zeros(num_pars, dtype=bool) if numeric is None else np.asarray(numeric)
y_positions = np.arange(num_pars)[::-1]
fig, ax = plt.subplots(figsize=(6, 1 + num_pars / 4), dpi=100)
ax.errorbar(bestfit, y_positions, xerr=uncertainty, fmt="o", color="black")

ax.fill_between([-2, 2], -0.5, len(bestfit) - 0.5, color="yellow")
ax.fill_between([-1, 1], -0.5, len(bestfit) - 0.5, color="limegreen")
ax.vlines(0, -0.5, len(bestfit) - 0.5, linestyles="dotted", color="black")
fig, ax = plt.subplots()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's keep the figsize=(6, 1 + num_pars / 4) scaling unless there's a strong reason not to, as that allows the spacing in the plot to be fairly consistent across a wide range of numbers of parameters.

if num_pars > np.sum(numeric): # Actual pulls
ax.errorbar(
np.ma.masked_array(bestfit, mask=numeric),
np.ma.masked_array(y_positions, mask=numeric),
xerr=uncertainty,
fmt="o",
color="black",
)
mask = np.ones(2 * num_pars + 1).astype(bool)
if numeric is not None:
mask[1::2] = ~numeric[::-1]
x_dummy = np.linspace(-0.5, num_pars - 0.5, 2 * num_pars + 1)
ax.fill_betweenx(x_dummy, -2, 2, color="yellow", where=mask, linewidth=0)
ax.fill_betweenx(x_dummy, -1, 1, color="limegreen", where=mask, linewidth=0)
ax.plot(
np.zeros(len(mask)),
np.ma.masked_array(x_dummy, mask=~mask),
linestyle="dotted",
color="black",
)
if np.sum(numeric) > 0:
for i, (show, par, unc) in enumerate(
zip(numeric[::-1], bestfit[::-1], uncertainty[::-1])
):
if show:
ax.text(0, i, rf"{par:.2f} $\pm$ {unc:.2f}", ha="center", va="center")

ax.set_xlim([-3, 3])
ax.set_xlabel(r"$\left(\hat{\theta} - \theta_0\right) / \Delta \theta$")
ax.set_ylim([-0.5, num_pars - 0.5])
ax.set_yticks(y_positions)
ax.set_yticklabels(labels)
ax.xaxis.set_minor_locator(mpl.ticker.AutoMinorLocator()) # minor ticks
ax.tick_params(axis="both", which="major", pad=8)
ax.tick_params(direction="in", top=True, right=True, which="both")
fig.set_tight_layout(True)
Comment on lines -106 to -109
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this removal is related to making figure customization easier via style sheets? If so, could we split that out into a separate PR? I'd like things to be more easily configurable, but I also do think the minor ticks help with legibility of constraints. Is there a way to move this into a default style sheet that users could override?


utils._save_and_close(fig, figure_path, close_figure)
return fig
Expand Down
34 changes: 28 additions & 6 deletions tests/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,12 @@ def test_workspace(mock_validate, mock_build, cli_helpers, tmp_path):
@mock.patch(
"cabinetry.fit.fit",
return_value=fit.FitResults(
np.asarray([1.0]), np.asarray([0.1]), ["label"], np.asarray([[1.0]]), 1.0
np.asarray([1.0]),
np.asarray([0.1]),
["label"],
["type"],
np.asarray([[1.0]]),
1.0,
),
autospec=True,
)
Expand All @@ -109,8 +114,9 @@ def test_fit(mock_utils, mock_fit, mock_pulls, mock_corrmat, tmp_path):
bestfit = np.asarray([1.0])
uncertainty = np.asarray([0.1])
labels = ["label"]
types = ["type"]
corr_mat = np.asarray([[1.0]])
fit_results = fit.FitResults(bestfit, uncertainty, labels, corr_mat, 1.0)
fit_results = fit.FitResults(bestfit, uncertainty, labels, types, corr_mat, 1.0)

workspace_path = str(tmp_path / "workspace.json")

Expand Down Expand Up @@ -204,7 +210,12 @@ def test_fit(mock_utils, mock_fit, mock_pulls, mock_corrmat, tmp_path):
@mock.patch(
"cabinetry.fit.fit",
return_value=fit.FitResults(
np.asarray([1.0]), np.asarray([0.1]), ["label"], np.asarray([[1.0]]), 1.0
np.asarray([1.0]),
np.asarray([0.1]),
["label"],
["type"],
np.asarray([[1.0]]),
1.0,
),
autospec=True,
)
Expand All @@ -218,8 +229,9 @@ def test_ranking(mock_utils, mock_fit, mock_rank, mock_vis, tmp_path):
bestfit = np.asarray([1.0])
uncertainty = np.asarray([0.1])
labels = ["label"]
types = ["type"]
corr_mat = np.asarray([[1.0]])
fit_results = fit.FitResults(bestfit, uncertainty, labels, corr_mat, 1.0)
fit_results = fit.FitResults(bestfit, uncertainty, labels, types, corr_mat, 1.0)

workspace_path = str(tmp_path / "workspace.json")

Expand Down Expand Up @@ -467,7 +479,12 @@ def test_significance(mock_utils, mock_sig, tmp_path):
@mock.patch(
"cabinetry.fit.fit",
return_value=fit.FitResults(
np.asarray([1.0]), np.asarray([0.1]), ["label"], np.asarray([[1.0]]), 1.0
np.asarray([1.0]),
np.asarray([0.1]),
["label"],
["type"],
np.asarray([[1.0]]),
1.0,
),
autospec=True,
)
Expand Down Expand Up @@ -508,7 +525,12 @@ def test_data_mc(
config_path = str(tmp_path / "config.yml")
cli_helpers.write_config(config_path, config)
fit_results = fit.FitResults(
np.asarray([1.0]), np.asarray([0.1]), ["label"], np.asarray([[1.0]]), 1.0
np.asarray([1.0]),
np.asarray([0.1]),
["label"],
["type"],
np.asarray([[1.0]]),
1.0,
)

result = runner.invoke(
Expand Down
Loading