Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support bin width normalization in data/MC plots #452

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config_example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Regions:
- Name: "Signal_region"
Variable: "jet_pt"
Filter: "lep_charge > 0"
Binning: [200, 300, 400, 500, 600]
Binning: [200, 300, 400, 600]

Samples:
- Name: "Data"
Expand Down
5 changes: 4 additions & 1 deletion example.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,8 @@
cabinetry.tabulate.yields(prediction_postfit, data)

# visualize pre- and post-fit distributions
cabinetry.visualize.data_mc(prediction_prefit, data, config=config)
plot_options = {"Signal_region": {"normalize_binwidth": (100, "GeV")}}
cabinetry.visualize.data_mc(
prediction_prefit, data, config=config, plot_options=plot_options
)
cabinetry.visualize.data_mc(prediction_postfit, data, config=config)
6 changes: 6 additions & 0 deletions src/cabinetry/visualize/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def data_mc(
log_scale_x: bool = False,
channels: Optional[Union[str, List[str]]] = None,
colors: Optional[Dict[str, str]] = None,
plot_options: Optional[Dict[str, dict]] = None,
close_figure: bool = False,
save_figure: bool = True,
) -> Optional[List[Dict[str, Any]]]:
Expand Down Expand Up @@ -189,6 +190,8 @@ def data_mc(
or list of names to include, defaults to None (uses all channels)
colors (Optional[Dict[str, str]], optional): map of sample names and colors to
use in plot, defaults to None (uses default colors)
plot_options (Optional[Dict[str, dict]], optional): plotting configuration
per region, defaults to None (no additional configuration)
close_figure (bool, optional): whether to close each figure, defaults to False
(enable when producing many figures to avoid memory issues, prevents
automatic rendering in notebooks)
Expand All @@ -213,6 +216,8 @@ def data_mc(
f"colors need to be provided for all samples, missing for {c_missing}"
)

plot_options = plot_options or {} # no additional plot options by default

# channels to include in plot, with optional filtering applied
filtered_channels = model_utils._filter_channels(model_prediction.model, channels)

Expand Down Expand Up @@ -283,6 +288,7 @@ def data_mc(
log_scale_x=log_scale_x,
label=label,
colors=colors,
plot_options=plot_options.get(channel_name, None),
close_figure=close_figure,
)
figure_dict_list.append({"figure": fig, "region": channel_name})
Expand Down
24 changes: 21 additions & 3 deletions src/cabinetry/visualize/plot_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def data_mc(
log_scale_x: bool = False,
label: str = "",
colors: Optional[Dict[str, str]] = None,
plot_options: Optional[Dict[str, Any]] = None,
close_figure: bool = False,
) -> mpl.figure.Figure:
"""Draws a data/MC histogram with uncertainty bands and ratio panel.
Expand All @@ -51,6 +52,8 @@ def data_mc(
label (str, optional): label written on the figure, defaults to ""
colors (Optional[Dict[str, str]], optional): map of sample names and colors to
use in plot, defaults to None (uses default colors)
plot_options (Optional[Dict[str, Any]], optional): plotting configuration for
this figure, defaults to None (no additional configuration)
close_figure (bool, optional): whether to close each figure immediately after
saving it, defaults to False (enable when producing many figures to avoid
memory issues, prevents rendering in notebooks)
Expand All @@ -61,15 +64,26 @@ def data_mc(
Returns:
matplotlib.figure.Figure: the data/MC figure
"""
plot_options = plot_options or {} # no additional plot options by default

if "normalize_binwidth" in plot_options:
rescaling_factor, unit = plot_options["normalize_binwidth"]
bin_width_norm = (bin_edges[1:] - bin_edges[:-1]) / rescaling_factor
else:
unit = None
bin_width_norm = np.ones_like(bin_edges[:-1])

total_model_unc /= bin_width_norm # apply bin width normalization

mc_histograms_yields = []
mc_labels = []
for h in histogram_dict_list:
if h["isData"]:
data_histogram_yields = h["yields"]
data_histogram_yields = h["yields"] / bin_width_norm
data_histogram_stdev = np.sqrt(data_histogram_yields)
data_label = h["label"]
else:
mc_histograms_yields.append(h["yields"])
mc_histograms_yields.append(h["yields"] / bin_width_norm)
mc_labels.append(h["label"])

mpl.style.use(MPL_STYLE)
Expand Down Expand Up @@ -229,8 +243,12 @@ def data_mc(
all_containers, all_labels, frameon=False, fontsize="large", loc="upper right"
)

vertical_axis_label = "events"
if unit is not None: # bin width normalization
vertical_axis_label += f" / {rescaling_factor} {unit}"

ax1.set_xlim(bin_edges[0], bin_edges[-1])
ax1.set_ylabel("events")
ax1.set_ylabel(vertical_axis_label)
ax1.set_xticklabels([])
ax1.set_xticklabels([], minor=True)
ax1.tick_params(axis="both", which="major", pad=8) # tick label - axis padding
Expand Down
Loading