diff --git a/.gitmodules b/.gitmodules index 371fe6e7..4cce2c4e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +1,4 @@ -[submodule "pybind11"] - path = extern/pybind11 - url = ../../pybind/pybind11.git -[submodule "extern/boosthistogram"] +[submodule "extern/histogram"] path = extern/histogram url = ../../boostorg/histogram.git [submodule "extern/core"] diff --git a/src/boost_histogram/_internal/hist.py b/src/boost_histogram/_internal/hist.py index da1e152c..9dc5b906 100644 --- a/src/boost_histogram/_internal/hist.py +++ b/src/boost_histogram/_internal/hist.py @@ -28,7 +28,7 @@ from boost_histogram import _core from .axestuple import AxesTuple -from .axis import Axis +from .axis import Axis, Variable from .enum import Kind from .storage import Double, Storage from .typing import Accumulator, ArrayLike, CppHistogram @@ -827,6 +827,7 @@ def __getitem__(self: H, index: IndexingExpr) -> H | float | Accumulator: slices: list[_core.algorithm.reduce_command] = [] pick_each: dict[int, int] = {} pick_set: dict[int, list[int]] = {} + reduced: CppHistogram | None = None # Compute needed slices and projections for i, ind in enumerate(indexes): @@ -855,16 +856,23 @@ def __getitem__(self: H, index: IndexingExpr) -> H | float | Accumulator: # This ensures that callable start/stop are handled start, stop = self.axes[i]._process_loc(ind.start, ind.stop) + groups = [] if ind != slice(None): merge = 1 if ind.step is not None: - if hasattr(ind.step, "factor"): + if getattr(ind.step, "factor", None) is not None: merge = ind.step.factor + elif ( + hasattr(ind.step, "group_mapping") + and (tmp_groups := ind.step.group_mapping(self.axes[i])) + is not None + ): + groups = tmp_groups elif callable(ind.step): if ind.step is sum: integrations.add(i) else: - raise RuntimeError("Full UHI not supported yet") + raise NotImplementedError if ind.start is not None or ind.stop is not None: slices.append( @@ -872,7 +880,8 @@ def __getitem__(self: H, index: IndexingExpr) -> H | float | Accumulator: i, start, stop, _core.algorithm.slice_mode.crop ) ) - continue + if len(groups) == 0: + continue else: raise IndexError( "The third argument to a slice must be rebin or projection" @@ -880,13 +889,49 @@ def __getitem__(self: H, index: IndexingExpr) -> H | float | Accumulator: assert isinstance(start, int) assert isinstance(stop, int) - slices.append(_core.algorithm.slice_and_rebin(i, start, stop, merge)) + # rebinning with factor + if len(groups) == 0: + slices.append( + _core.algorithm.slice_and_rebin(i, start, stop, merge) + ) + # rebinning with groups + elif len(groups) != 0: + if not reduced: + reduced = self._hist + axes = [reduced.axis(x) for x in range(reduced.rank())] + reduced_view = reduced.view(flow=True) + new_axes_indices = [axes[i].edges[0]] + + j = 0 + for group in groups: + new_axes_indices += [axes[i].edges[j + group]] + j += group + + variable_axis = Variable( + new_axes_indices, metadata=axes[i].metadata + ) + axes[i] = variable_axis._ax + + logger.debug("Axes: %s", axes) + + new_reduced = reduced.__class__(axes) + new_view = new_reduced.view(flow=True) + + j = 1 + for new_j, group in enumerate(groups): + for _ in range(group): + pos = [slice(None)] * (i) + new_view[(*pos, new_j + 1, ...)] += reduced_view[ # type: ignore[arg-type] + (*pos, j, ...) # type: ignore[arg-type] + ] + j += 1 + + reduced = new_reduced # Will be updated below - if slices or pick_set or pick_each or integrations: + if (slices or pick_set or pick_each or integrations) and not reduced: reduced = self._hist - else: - logger.debug("Reduce actions are all empty, just making a copy") + elif not reduced: reduced = copy.copy(self._hist) if pick_each: diff --git a/src/boost_histogram/tag.py b/src/boost_histogram/tag.py index 0fcfb9f7..fa4154e1 100644 --- a/src/boost_histogram/tag.py +++ b/src/boost_histogram/tag.py @@ -4,7 +4,10 @@ import copy from builtins import sum -from typing import TypeVar +from typing import TYPE_CHECKING, Sequence, TypeVar + +if TYPE_CHECKING: + from uhi.typing.plottable import PlottableAxis from ._internal.typing import AxisLike @@ -108,12 +111,40 @@ def __call__(self, axis: AxisLike) -> int: # noqa: ARG002 class rebin: - __slots__ = ("factor",) - - def __init__(self, value: int) -> None: - self.factor = value + __slots__ = ( + "factor", + "groups", + ) + + def __init__( + self, + factor: int | None = None, + *, + groups: Sequence[int] | None = None, + ) -> None: + if not sum(i is None for i in [factor, groups]) == 1: + raise ValueError("Exactly one, a factor or groups should be provided") + self.factor = factor + self.groups = groups def __repr__(self) -> str: - return f"{self.__class__.__name__}({self.factor})" - - # TODO: Add __call__ to support UHI + repr_str = f"{self.__class__.__name__}" + args: dict[str, int | Sequence[int] | None] = { + "factor": self.factor, + "groups": self.groups, + } + for k, v in args.items(): + if v is not None: + return_str = f"{repr_str}({k}={v})" + break + return return_str + + def group_mapping(self, axis: PlottableAxis) -> Sequence[int]: + if self.groups is not None: + if sum(self.groups) != len(axis): + msg = f"The sum of the groups ({sum(self.groups)}) must be equal to the number of bins in the axis ({len(axis)})" + raise ValueError(msg) + return self.groups + if self.factor is not None: + return [self.factor] * len(axis) + raise ValueError("No rebinning factor or groups provided") diff --git a/tests/test_histogram.py b/tests/test_histogram.py index acaf9bbd..e5eae7b5 100644 --- a/tests/test_histogram.py +++ b/tests/test_histogram.py @@ -632,13 +632,17 @@ def test_shrink_1d(): def test_rebin_1d(): h = bh.Histogram(bh.axis.Regular(20, 1, 5)) - h.fill(1.1) + h.fill([1.1, 2.2, 3.3, 4.4]) hs = h[{0: slice(None, None, bh.rebin(4))}] - assert_array_equal(hs.view(), [1, 0, 0, 0, 0]) + assert_array_equal(hs.view(), [1, 1, 1, 0, 1]) hs = h[{0: bh.rebin(4)}] - assert_array_equal(hs.view(), [1, 0, 0, 0, 0]) + assert_array_equal(hs.view(), [1, 1, 1, 0, 1]) + + hs = h[{0: bh.rebin(groups=[1, 2, 3, 14])}] + assert_array_equal(hs.view(), [1, 0, 0, 3]) + assert_array_equal(hs.axes.edges[0], [1.0, 1.2, 1.6, 2.2, 5.0]) def test_shrink_rebin_1d(): @@ -659,8 +663,60 @@ def test_rebin_nd(): assert h[{1: s[:: bh.rebin(2)]}].axes.size == (20, 15, 40) assert h[{2: s[:: bh.rebin(2)]}].axes.size == (20, 30, 20) + assert h[{0: s[:: bh.rebin(groups=[1, 2, 17])]}].axes.size == (3, 30, 40) + assert h[{1: s[:: bh.rebin(groups=[1, 2, 27])]}].axes.size == (20, 3, 40) + assert h[{2: s[:: bh.rebin(groups=[1, 2, 37])]}].axes.size == (20, 30, 3) + assert np.all( + np.isclose( + h[{0: s[:: bh.rebin(groups=[1, 2, 17])]}].axes[0].edges, + [1.0, 1.1, 1.3, 3.0], + ) + ) + assert np.all( + np.isclose( + h[{1: s[:: bh.rebin(groups=[1, 2, 27])]}].axes[1].edges, + [1.0, 1.06666667, 1.2, 3.0], + ) + ) + assert np.all( + np.isclose( + h[{2: s[:: bh.rebin(groups=[1, 2, 37])]}].axes[2].edges, + [1.0, 1.05, 1.15, 3.0], + ) + ) + assert h[{0: s[:: bh.rebin(2)], 2: s[:: bh.rebin(2)]}].axes.size == (10, 30, 20) + assert h[ + {0: s[:: bh.rebin(groups=[1, 2, 17])], 2: s[:: bh.rebin(groups=[1, 2, 37])]} + ].axes.size == (3, 30, 3) + assert np.all( + np.isclose( + h[ + { + 0: s[:: bh.rebin(groups=[1, 2, 17])], + 2: s[:: bh.rebin(groups=[1, 2, 37])], + } + ] + .axes[0] + .edges, + [1.0, 1.1, 1.3, 3], + ) + ) + assert np.all( + np.isclose( + h[ + { + 0: s[:: bh.rebin(groups=[1, 2, 17])], + 2: s[:: bh.rebin(groups=[1, 2, 37])], + } + ] + .axes[2] + .edges, + [1.0, 1.05, 1.15, 3.0], + ) + ) + assert h[{1: s[:: bh.sum]}].axes.size == (20, 40) assert h[{1: bh.sum}].axes.size == (20, 40) diff --git a/tests/test_histogram_indexing.py b/tests/test_histogram_indexing.py index a86838ed..7dd94ea8 100644 --- a/tests/test_histogram_indexing.py +++ b/tests/test_histogram_indexing.py @@ -240,7 +240,7 @@ def test_repr(): assert repr(bh.overflow + 1) == "overflow + 1" assert repr(bh.overflow - 1) == "overflow - 1" - assert repr(bh.rebin(2)) == "rebin(2)" + assert repr(bh.rebin(2)) == "rebin(factor=2)" # Was broken in 0.6.1