Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: enable template histogram creation and collection to accept list of tasks #421

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion example.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,20 @@
cabinetry.configuration.print_overview(config)

# create template histograms
cabinetry.templates.build(config, method="uproot")
from dask.distributed import Client, LocalCluster, wait

def produce_single_template(template):
cabinetry.templates.build(config, template_list=[template])

template_list = cabinetry.route.required_templates(config)

with LocalCluster(n_workers=2) as cluster:
client = Client(cluster)
wait(client.map(produce_single_template, template_list))

# cabinetry.templates.build(config, template_list=template_list)

raise SystemExit

# perform histogram post-processing
cabinetry.templates.postprocess(config)
Expand Down
107 changes: 64 additions & 43 deletions src/cabinetry/route.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import fnmatch
import logging
from typing import Any, Callable, Dict, List, Literal, Optional
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple

import boost_histogram as bh

Expand Down Expand Up @@ -38,6 +38,12 @@
# (which returns a histogram) into a function that returns None
WrapperFunc = Callable[[UserTemplateFunc], ProcessorFunc]

# type of tuple capturing all relevant information to obtain a template histogram
# this includes region, sample, systematic and template (up/down)
TemplateHistogramInformation = Tuple[
Dict[str, Any], Dict[str, Any], Dict[str, Any], Optional[Literal["Up", "Down"]]
]
Comment on lines +43 to +45
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This probably would be good to replace by a lightweight class to wrap the information.



class Router:
"""Holds user-defined processing functions and matches functions to templates.
Expand Down Expand Up @@ -257,33 +263,18 @@ def _find_template_builder_match(
return None


def apply_to_all_templates(
config: Dict[str, Any],
default_func: ProcessorFunc,
*,
match_func: Optional[MatchFunc] = None,
) -> None:
"""Applies the supplied function ``default_func`` to all templates.

The templates are specified by the configuration file. The function takes four
arguments in this order:

- the dict specifying region information
- the dict specifying sample information
- the dict specifying systematic information
- the template being considered: "Up", "Down", or None for the nominal template

In addition it is possible to specify a function that returns custom overrides. If
one is found for a given template, it is used instead of the default.
def required_templates(config: Dict[str, Any]) -> List[TemplateHistogramInformation]:
"""Returns relevant information needed to produce all required template histograms.

Args:
config (Dict[str, Any]): cabinetry configuration
default_func (ProcessorFunc): function to be called for every template by
default
match_func: (Optional[MatchFunc], optional): function that returns user-defined
functions to override the call to ``default_func``, defaults to None (then
it is not used)

Returns:
List[TemplateHistogramInformation]: list of relevant information for each
template histogram
"""
all_templates = []

for region in config["Regions"]:
log.debug(f" in region {region['Name']}")

Expand Down Expand Up @@ -321,22 +312,52 @@ def apply_to_all_templates(
f"{' ' + template if template is not None else ''}"
)

func_override = None
if match_func is not None:
# check whether a user-defined function was registered that
# matches this region-sample-systematic-template
systematic_name = (
systematic["Name"] if template is not None else ""
)
func_override = match_func(
region["Name"], sample["Name"], systematic_name, template
)
if func_override is not None:
# call the user-defined function
log.debug(
f"executing user-defined override {func_override.__name__}"
)
func_override(region, sample, systematic, template)
else:
# call the provided default function
default_func(region, sample, systematic, template)
all_templates.append((region, sample, systematic, template))

return all_templates


def apply_to_templates(
default_func: ProcessorFunc, # BREAKING API CHANGE
template_list: List[TemplateHistogramInformation],
*,
match_func: Optional[MatchFunc] = None,
) -> None:
"""Applies the supplied function ``default_func`` to all specified templates.

The templates are specified by the configuration file. The function takes four
arguments in this order:

- the dict specifying region information
- the dict specifying sample information
- the dict specifying systematic information
- the template being considered: "Up", "Down", or None for the nominal template

In addition it is possible to specify a function that returns custom overrides. If
one is found for a given template, it is used instead of the default.

Args:
default_func (ProcessorFunc): function to be called for every template by
default
template_list (List[TemplateHistogramInformation]): list of template information
to apply function to
match_func: (Optional[MatchFunc], optional): function that returns user-defined
functions to override the call to ``default_func``, defaults to None (then
it is not used)
"""
for region, sample, systematic, template in template_list:
func_override = None
if match_func is not None:
# check whether a user-defined function was registered that
# matches this region-sample-systematic-template
systematic_name = systematic["Name"] if template is not None else ""
func_override = match_func(
region["Name"], sample["Name"], systematic_name, template
)
if func_override is not None:
# call the user-defined function
log.debug(f"executing user-defined override {func_override.__name__}")
func_override(region, sample, systematic, template)
else:
# call the provided default function
default_func(region, sample, systematic, template)
43 changes: 36 additions & 7 deletions src/cabinetry/templates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import pathlib
from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional

from cabinetry import route
from cabinetry.templates import builder
Expand All @@ -18,6 +18,7 @@ def build(
*,
method: str = "uproot",
router: Optional[route.Router] = None,
template_list: Optional[List[route.TemplateHistogramInformation]] = None,
) -> None:
"""Produces all required histograms specified by the configuration file.

Expand All @@ -31,6 +32,8 @@ def build(
"uproot"
router (Optional[route.Router], optional): instance of cabinetry.route.Router
that contains user-defined overrides, defaults to None
template_list (Optional[List[route.TemplateHistogramInformation]]): list of
information for templates to process, defaults to None (all templates)
"""
# create an instance of the class doing the template building
histogram_folder = pathlib.Path(config["General"]["HistogramFolder"])
Expand All @@ -44,12 +47,21 @@ def build(
# get a function that can be queried to return a user-defined template builder
match_func = router._find_template_builder_match

route.apply_to_all_templates(
config, template_builder._create_histogram, match_func=match_func
# get list of required templates to process if not provided already
if template_list is None:
template_list = route.required_templates(config)

route.apply_to_templates(
template_builder._create_histogram, template_list, match_func=match_func
)


def collect(config: Dict[str, Any], *, method: str = "uproot") -> None:
def collect(
config: Dict[str, Any],
*,
method: str = "uproot",
template_list: Optional[List[route.TemplateHistogramInformation]] = None,
) -> None:
"""Collects all required histograms specified by the configuration file.

Histograms must already exist, and this collects and saves them in the format used
Expand All @@ -60,6 +72,8 @@ def collect(config: Dict[str, Any], *, method: str = "uproot") -> None:
config (Dict[str, Any]): cabinetry configuration
method (str, optional): backend to use for histogram production, defaults to
"uproot"
template_list (Optional[List[route.TemplateHistogramInformation]]): list of
information for templates to process, defaults to None (all templates)
"""
histogram_folder = pathlib.Path(config["General"]["HistogramFolder"])
general_path = config["General"]["InputPath"]
Expand All @@ -71,15 +85,30 @@ def collect(config: Dict[str, Any], *, method: str = "uproot") -> None:
processor = collector._collector(
histogram_folder, general_path, variation_path, method
)
route.apply_to_all_templates(config, processor)

# get list of required templates to process if not provided already
if template_list is None:
template_list = route.required_templates(config)

route.apply_to_templates(processor, template_list)

def postprocess(config: Dict[str, Any]) -> None:

def postprocess(
config: Dict[str, Any],
template_list: Optional[List[route.TemplateHistogramInformation]] = None,
) -> None:
"""Applies postprocessing to all histograms.

Args:
config (Dict[str, Any]): cabinetry configuration
template_list (Optional[List[route.TemplateHistogramInformation]]): list of
information for templates to process, defaults to None (all templates)
"""
histogram_folder = pathlib.Path(config["General"]["HistogramFolder"])
processor = postprocessor._postprocessor(histogram_folder)
route.apply_to_all_templates(config, processor)

# get list of required templates to process if not provided already
if template_list is None:
template_list = route.required_templates(config)

route.apply_to_templates(processor, template_list)
Loading