scikit-hep · andrzejnovak · Jun 9, 2022 · Jun 9, 2022 · Jun 9, 2022 · Jun 9, 2022
diff --git a/src/cabinetry/fit/__init__.py b/src/cabinetry/fit/__init__.py
@@ -86,7 +86,7 @@ def _fit_model_pyhf(
 
     bestfit = pyhf.tensorlib.to_numpy(result[:, 0])
     uncertainty = pyhf.tensorlib.to_numpy(result[:, 1])
-    labels = model.config.par_names()
+    labels, types = model_utils._labels_modifiers(model)
     corr_mat = pyhf.tensorlib.to_numpy(corr_mat)
     best_twice_nll = float(best_twice_nll)  # convert 0-dim np.ndarray to float
 
@@ -98,6 +98,7 @@ def _fit_model_pyhf(
         bestfit,
         uncertainty,
         labels,
+        types,
         corr_mat,
         best_twice_nll,
         minos_uncertainty=minos_results,
@@ -145,7 +146,7 @@ def _fit_model_custom(
     fix_pars = fix_pars or model.config.suggested_fixed()
     par_bounds = par_bounds or model.config.suggested_bounds()
 
-    labels = model.config.par_names()
+    labels, types = model_utils._labels_modifiers(model)
 
     # set initial step size to 0 for fixed parameters
     # this will cause the associated parameter uncertainties to be 0 post-fit
@@ -190,6 +191,7 @@ def twice_nll_func(pars: np.ndarray) -> Any:
         bestfit,
         uncertainty,
         labels,
+        types,
         corr_mat,
         best_twice_nll,
         minos_uncertainty=minos_results,

diff --git a/src/cabinetry/fit/results_containers.py b/src/cabinetry/fit/results_containers.py
@@ -1,6 +1,6 @@
 """Provides containers for inference results."""
 
-from typing import Dict, List, NamedTuple, Tuple
+from typing import Dict, List, NamedTuple, Optional, Tuple
 
 import numpy as np
 
@@ -13,6 +13,7 @@ class FitResults(NamedTuple):
         uncertainty (np.ndarray): uncertainties of best-fit parameter results, evaluated
             with Hessian
         labels (List[str]): parameter labels
+        types (List[Optional[str]]): parameter types
         corr_mat (np.ndarray): parameter correlation matrix
         best_twice_nll (float): -2 log(likelihood) at best-fit point
         goodess_of_fit (float, optional): goodness-of-fit p-value, defaults to -1
@@ -23,6 +24,7 @@ class FitResults(NamedTuple):
     bestfit: np.ndarray
     uncertainty: np.ndarray
     labels: List[str]
+    types: List[Optional[str]]
     corr_mat: np.ndarray
     best_twice_nll: float
     goodness_of_fit: float = -1

diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py
@@ -2,6 +2,7 @@
 
 import json
 import logging
+import re
 from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union
 
 import awkward as ak
@@ -516,6 +517,17 @@ def _filter_channels(
     return filtered_channels
 
 
+def _labels_modifiers(
+    model: pyhf.pdf.Model,
+) -> Tuple[List[str], List[Optional[str]]]:
+    """ """
+    labels = model.config.par_names()
+    _mod_dict = dict(model.config.modifiers)
+    _clean_labels = [re.sub(r"\[.*\]", "", label) for label in labels]
+    types = [_mod_dict[n] if n in _mod_dict else None for n in _clean_labels]
+    return labels, types
+
+
 def match_fit_results(model: pyhf.pdf.Model, fit_results: FitResults) -> FitResults:
     """Matches results from a fit to a model by adding or removing parameters as needed.
 
@@ -543,7 +555,7 @@ def match_fit_results(model: pyhf.pdf.Model, fit_results: FitResults) -> FitResu
 
     bestfit = asimov_parameters(model)  # Asimov parameter values for target model
     uncertainty = prefit_uncertainties(model)  # pre-fit uncertainties for target model
-    labels = model.config.par_names()  # labels for target model
+    labels, types = _labels_modifiers(model)
 
     # indices of parameters in current fit results, or None if they are missing
     indices_for_corr: List[Optional[int]] = [None] * len(labels)
@@ -577,6 +589,7 @@ def match_fit_results(model: pyhf.pdf.Model, fit_results: FitResults) -> FitResu
         np.asarray(bestfit),
         np.asarray(uncertainty),
         labels,
+        types,
         corr_mat,
         fit_results.best_twice_nll,
         fit_results.goodness_of_fit,

diff --git a/src/cabinetry/visualize/__init__.py b/src/cabinetry/visualize/__init__.py
@@ -1,5 +1,6 @@
 """High-level entry point for visualizing fit models and inference results."""
 
+import fnmatch
 import glob
 import logging
 import pathlib
@@ -441,6 +442,7 @@ def pulls(
     *,
     figure_folder: Union[str, pathlib.Path] = "figures",
     exclude: Optional[Union[str, List[str], Tuple[str, ...]]] = None,
+    exclude_by_type: Optional[List[str]] = None,
     close_figure: bool = True,
     save_figure: bool = True,
 ) -> mpl.figure.Figure:
@@ -452,7 +454,11 @@ def pulls(
         figure_folder (Union[str, pathlib.Path], optional): path to the folder to save
             figures in, defaults to "figures"
         exclude (Optional[Union[str, List[str], Tuple[str, ...]]], optional): parameter
-            or parameters to exclude from plot, defaults to None (nothing excluded)
+            or parameters to exclude from plot, defaults to None (nothing excluded),
+            compatible with unix wildcards
+        exclude_by_type (Optional[Union[str, List[str], Tuple[str, ...]]], optional):
+            exclude parameters of the given type, defaults ``['staterror']`` filtering
+            out mc_stat uncertainties which are centered on 1
         close_figure (bool, optional): whether to close figure, defaults to True
         save_figure (bool, optional): whether to save figure, defaults to True
 
@@ -462,11 +468,14 @@ def pulls(
     # path is None if figure should not be saved
     figure_path = pathlib.Path(figure_folder) / "pulls.pdf" if save_figure else None
     labels_np = np.asarray(fit_results.labels)
+    numeric = np.array(
+        [True if ty in ["normfactor"] else False for ty in fit_results.types]
+    )
 
     if exclude is None:
         exclude_set = set()
     elif isinstance(exclude, str):
-        exclude_set = {exclude}
+        exclude_set = set(fnmatch.filter(fit_results.labels, exclude))
     else:
         exclude_set = set(exclude)
 
@@ -479,19 +488,29 @@ def pulls(
         ]
     )
 
-    # exclude staterror parameters from pull plot (they are centered at 1)
-    exclude_set.update([label for label in labels_np if label[0:10] == "staterror_"])
+    # exclude by type
+    if exclude_by_type is None:
+        exclude_by_type = ["staterror"]
+    exclude_set.update(
+        [
+            label
+            for label, kind in zip(labels_np, fit_results.types)
+            if kind in exclude_by_type
+        ]
+    )
 
     # filter out user-specified parameters
     mask = [True if label not in exclude_set else False for label in labels_np]
     bestfit = fit_results.bestfit[mask]
     uncertainty = fit_results.uncertainty[mask]
     labels_np = labels_np[mask]
+    numeric = numeric[mask]
 
     fig = plot_result.pulls(
         bestfit,
         uncertainty,
         labels_np,
+        numeric=numeric,
         figure_path=figure_path,
         close_figure=close_figure,
     )

diff --git a/src/cabinetry/visualize/plot_result.py b/src/cabinetry/visualize/plot_result.py
@@ -71,6 +71,7 @@ def pulls(
     uncertainty: np.ndarray,
     labels: Union[List[str], np.ndarray],
     *,
+    numeric: Optional[Union[List[bool], np.ndarray]] = None,
     figure_path: Optional[pathlib.Path] = None,
     close_figure: bool = False,
 ) -> mpl.figure.Figure:
@@ -80,6 +81,8 @@ def pulls(
         bestfit (np.ndarray): best-fit parameter results
         uncertainty (np.ndarray): parameter uncertainties
         labels (Union[List[str], np.ndarray]): parameter names
+        numeric Optional[Union[List[bool], np.ndarray]]: which parameters are numeric,
+            and should only be shown directly
         figure_path (Optional[pathlib.Path], optional): path where figure should be
             saved, or None to not save it, defaults to None
         close_figure (bool, optional): whether to close each figure immediately after
@@ -90,23 +93,42 @@ def pulls(
         matplotlib.figure.Figure: the pull figure
     """
     num_pars = len(bestfit)
+    numeric = np.zeros(num_pars, dtype=bool) if numeric is None else np.asarray(numeric)
     y_positions = np.arange(num_pars)[::-1]
-    fig, ax = plt.subplots(figsize=(6, 1 + num_pars / 4), dpi=100)
-    ax.errorbar(bestfit, y_positions, xerr=uncertainty, fmt="o", color="black")
 
-    ax.fill_between([-2, 2], -0.5, len(bestfit) - 0.5, color="yellow")
-    ax.fill_between([-1, 1], -0.5, len(bestfit) - 0.5, color="limegreen")
-    ax.vlines(0, -0.5, len(bestfit) - 0.5, linestyles="dotted", color="black")
+    fig, ax = plt.subplots()
+    if num_pars > np.sum(numeric):  # Actual pulls
+        ax.errorbar(
+            np.ma.masked_array(bestfit, mask=numeric),
+            np.ma.masked_array(y_positions, mask=numeric),
+            xerr=uncertainty,
+            fmt="o",
+            color="black",
+        )
+        mask = np.ones(2 * num_pars + 1).astype(bool)
+        if numeric is not None:
+            mask[1::2] = ~numeric[::-1]
+        x_dummy = np.linspace(-0.5, num_pars - 0.5, 2 * num_pars + 1)
+        ax.fill_betweenx(x_dummy, -2, 2, color="yellow", where=mask, linewidth=0)
+        ax.fill_betweenx(x_dummy, -1, 1, color="limegreen", where=mask, linewidth=0)
+        ax.plot(
+            np.zeros(len(mask)),
+            np.ma.masked_array(x_dummy, mask=~mask),
+            linestyle="dotted",
+            color="black",
+        )
+    if np.sum(numeric) > 0:
+        for i, (show, par, unc) in enumerate(
+            zip(numeric[::-1], bestfit[::-1], uncertainty[::-1])
+        ):
+            if show:
+                ax.text(0, i, rf"{par:.2f} $\pm$ {unc:.2f}", ha="center", va="center")
 
     ax.set_xlim([-3, 3])
     ax.set_xlabel(r"$\left(\hat{\theta} - \theta_0\right) / \Delta \theta$")
     ax.set_ylim([-0.5, num_pars - 0.5])
     ax.set_yticks(y_positions)
     ax.set_yticklabels(labels)
-    ax.xaxis.set_minor_locator(mpl.ticker.AutoMinorLocator())  # minor ticks
-    ax.tick_params(axis="both", which="major", pad=8)
-    ax.tick_params(direction="in", top=True, right=True, which="both")
-    fig.set_tight_layout(True)
 
     utils._save_and_close(fig, figure_path, close_figure)
     return fig

diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py
@@ -95,7 +95,12 @@ def test_workspace(mock_validate, mock_build, cli_helpers, tmp_path):
 @mock.patch(
     "cabinetry.fit.fit",
     return_value=fit.FitResults(
-        np.asarray([1.0]), np.asarray([0.1]), ["label"], np.asarray([[1.0]]), 1.0
+        np.asarray([1.0]),
+        np.asarray([0.1]),
+        ["label"],
+        ["type"],
+        np.asarray([[1.0]]),
+        1.0,
     ),
     autospec=True,
 )
@@ -109,8 +114,9 @@ def test_fit(mock_utils, mock_fit, mock_pulls, mock_corrmat, tmp_path):
     bestfit = np.asarray([1.0])
     uncertainty = np.asarray([0.1])
     labels = ["label"]
+    types = ["type"]
     corr_mat = np.asarray([[1.0]])
-    fit_results = fit.FitResults(bestfit, uncertainty, labels, corr_mat, 1.0)
+    fit_results = fit.FitResults(bestfit, uncertainty, labels, types, corr_mat, 1.0)
 
     workspace_path = str(tmp_path / "workspace.json")
 
@@ -204,7 +210,12 @@ def test_fit(mock_utils, mock_fit, mock_pulls, mock_corrmat, tmp_path):
 @mock.patch(
     "cabinetry.fit.fit",
     return_value=fit.FitResults(
-        np.asarray([1.0]), np.asarray([0.1]), ["label"], np.asarray([[1.0]]), 1.0
+        np.asarray([1.0]),
+        np.asarray([0.1]),
+        ["label"],
+        ["type"],
+        np.asarray([[1.0]]),
+        1.0,
     ),
     autospec=True,
 )
@@ -218,8 +229,9 @@ def test_ranking(mock_utils, mock_fit, mock_rank, mock_vis, tmp_path):
     bestfit = np.asarray([1.0])
     uncertainty = np.asarray([0.1])
     labels = ["label"]
+    types = ["type"]
     corr_mat = np.asarray([[1.0]])
-    fit_results = fit.FitResults(bestfit, uncertainty, labels, corr_mat, 1.0)
+    fit_results = fit.FitResults(bestfit, uncertainty, labels, types, corr_mat, 1.0)
 
     workspace_path = str(tmp_path / "workspace.json")
 
@@ -467,7 +479,12 @@ def test_significance(mock_utils, mock_sig, tmp_path):
 @mock.patch(
     "cabinetry.fit.fit",
     return_value=fit.FitResults(
-        np.asarray([1.0]), np.asarray([0.1]), ["label"], np.asarray([[1.0]]), 1.0
+        np.asarray([1.0]),
+        np.asarray([0.1]),
+        ["label"],
+        ["type"],
+        np.asarray([[1.0]]),
+        1.0,
     ),
     autospec=True,
 )
@@ -508,7 +525,12 @@ def test_data_mc(
     config_path = str(tmp_path / "config.yml")
     cli_helpers.write_config(config_path, config)
     fit_results = fit.FitResults(
-        np.asarray([1.0]), np.asarray([0.1]), ["label"], np.asarray([[1.0]]), 1.0
+        np.asarray([1.0]),
+        np.asarray([0.1]),
+        ["label"],
+        ["type"],
+        np.asarray([[1.0]]),
+        1.0,
     )
 
     result = runner.invoke(