Skip to content

Commit

Permalink
Merge pull request #73 from FAST-HEP/BK_ND_jagged_arrays
Browse files Browse the repository at this point in the history
Add support for ND jagged arrays in expressions
  • Loading branch information
benkrikler authored Oct 3, 2019
2 parents fbe0da3 + 0a9abe8 commit dcbedb5
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 22 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Removed

## [0.14.0] - 2019-10-03
### Added
- Support for ND jagged array in expressions, PR #73
- Automatic conversion of ObjectArrays from uproot to JaggedArrays, PR #73

## [0.13.4] - 2019-09-21
### Changed
- Fixed interpretation of user-defined variables for uproot, issue #67, PR #71 [@benkrikler](https://github.com/benkrikler)
Expand Down
45 changes: 31 additions & 14 deletions fast_carpenter/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,25 +39,36 @@ def get_branches(cut, valid):
return branches


def deconstruct_jaggedness(array, counts):
if not isinstance(array, awkward.array.base.AwkwardArrayWithContent):
return array, counts

array = array.compact()
counts.insert(0, array.counts)
return deconstruct_jaggedness(array.content, counts)


def reconstruct_jaggedness(array, counts):
for count in counts:
array = awkward.JaggedArray.fromcounts(count, array)
return array


class TreeToDictAdaptor():
"""
Make an uproot tree look like a dict for numexpr
"""
def __init__(self, tree, alias_dict):
self.tree = tree
self.starts = None
self.stops = None
self.counts = None
self.aliases = alias_dict

def __getitem__(self, item):
if item in constants:
return constants[item]
full_item = self.aliases.get(item, item)
array = self.tree.array(full_item)
starts = getattr(array, "starts", None)
if starts is not None:
self.set_starts_stop(starts, array.stops)
return array.content
array = self.strip_jaggedness(array)
return array

def __contains__(self, item):
Expand All @@ -67,13 +78,20 @@ def __iter__(self):
for i in self.tree:
yield i

def set_starts_stop(self, starts, stops):
if self.starts is not None:
if any(self.starts != starts) or any(self.stops != stops):
raise RuntimeError("Mismatched starts and stops")
def strip_jaggedness(self, array):
array, new_counts = deconstruct_jaggedness(array, counts=[])
if self.counts is not None:
if not all(np.array_equal(c, n) for c, n in zip(self.counts, new_counts)):
raise RuntimeError("Operation using arrays with different jaggedness")
else:
self.starts = starts
self.stops = stops
self.counts = new_counts
return array

def apply_jaggedness(self, array):
if self.counts is None:
return array
result = reconstruct_jaggedness(array, self.counts)
return result


attribute_re = re.compile(r"([a-zA-Z]\w*)\s*\.\s*(\w+)")
Expand All @@ -95,6 +113,5 @@ def evaluate(tree, expression):
cleaned_expression, alias_dict = preprocess_expression(expression)
adaptor = TreeToDictAdaptor(tree, alias_dict)
result = numexpr.evaluate(cleaned_expression, local_dict=adaptor)
if adaptor.starts is not None:
result = awkward.JaggedArray(adaptor.starts, adaptor.stops, result)
result = adaptor.apply_jaggedness(result)
return result
26 changes: 22 additions & 4 deletions fast_carpenter/tree_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,41 @@
minimal coding on my side...
"""
import uproot
from uproot.interp.objects import asgenobj
from uproot.interp.jagged import asjagged
from uproot.interp.numerical import asdtype
import copy
import awkward


def recursive_type_wrap(array):
if isinstance(array, awkward.JaggedArray):
return asjagged(recursive_type_wrap(array.content))
return asdtype(array.dtype.fields)


class asgenobj_then_jagged():
def __init__(self, original):
self.wrapping = original

def finalize(self, *args, **kwargs):
result = self.wrapping.finalize(*args, **kwargs)
return awkward.JaggedArray.fromiter(result)

def __getattr__(self, attr):
return getattr(self.wrapping, attr)


def wrapped_interpret(branch, *args, **kwargs):
from uproot.interp.auto import interpret
result = interpret(branch, *args, **kwargs)
if result:
if isinstance(result, asgenobj):
result = asgenobj_then_jagged(result)
return result

if isinstance(branch, WrappedTree.FakeBranch):
if isinstance(branch._values, awkward.JaggedArray):
return asjagged(asdtype(branch._values.content.dtype.fields))
else:
return branch._values
return recursive_type_wrap(branch._values)

return None

Expand Down
2 changes: 1 addition & 1 deletion fast_carpenter/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ def split_version(version):
return tuple(result)


__version__ = '0.13.4'
__version__ = '0.14.0'
version_info = split_version(__version__) # noqa
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.13.4
current_version = 0.14.0
commit = True
tag = False

Expand Down
57 changes: 55 additions & 2 deletions tests/test_expressions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import pytest
import numpy as np
from awkward import JaggedArray
from fast_carpenter import expressions


Expand All @@ -21,10 +23,24 @@ def test_evaluate(wrapped_tree):
assert all(mu_pt.counts == Muon_py.counts)


def test_evaluate_bool(wrapped_tree):
all_true = expressions.evaluate(wrapped_tree, "Muon_Px == Muon_Px")
def test_evaulate_matches_array(wrapped_tree):
mu_px_array = wrapped_tree.array("Muon_Px") < 0.3
mu_px_evalu = expressions.evaluate(wrapped_tree, "Muon_Px < 0.3")
assert (mu_px_evalu == mu_px_array).all().all()


def test_evaluate_bool(full_wrapped_tree):
all_true = expressions.evaluate(full_wrapped_tree, "Muon_Px == Muon_Px")
assert all(all_true.all())

mu_cut = expressions.evaluate(full_wrapped_tree, "NMuon > 1")
ele_cut = expressions.evaluate(full_wrapped_tree, "NElectron > 1")
jet_cut = expressions.evaluate(full_wrapped_tree, "NJet > 1")
mu_px = expressions.evaluate(full_wrapped_tree, "Muon_Px > 0.3")
mu_px = mu_px.pad(2)[:, 1]
combined = mu_cut & (ele_cut | jet_cut) & mu_px
assert np.count_nonzero(combined) == 2


def test_evaluate_dot(wrapped_tree):
wrapped_tree.new_variable("Muon.Px", wrapped_tree.array("Muon_Px"))
Expand All @@ -38,3 +54,40 @@ def test_constants(infile):

ninf_1_or_fewer_mu = expressions.evaluate(infile, "where(NMuon > 1, NMuon, -inf)")
assert np.count_nonzero(np.isfinite(ninf_1_or_fewer_mu)) == 289


def test_3D_jagged(wrapped_tree):
fake_3d = [[np.arange(i + 1) + j
for i in range(j % 3)]
for j in range(len(wrapped_tree))]
fake_3d = JaggedArray.fromiter(fake_3d)
wrapped_tree.new_variable("Fake3D", fake_3d)
assert isinstance(fake_3d.count(), JaggedArray)
assert all((fake_3d.copy().count() == fake_3d.count()).all())

aliased = expressions.evaluate(wrapped_tree, "Fake3D")
assert (aliased == fake_3d).all().all().all()

doubled = expressions.evaluate(wrapped_tree, "Fake3D * 2")
assert (doubled == fake_3d * 2).all().all().all()
assert len(doubled[0, :, :]) == 0
assert doubled[1, 0, :] == [2]
assert doubled[2, 0, :] == [4]
assert all(doubled[2, 1, :] == [4, 6])

doubled = expressions.evaluate(wrapped_tree, "Fake3D + Fake3D")
assert (doubled == fake_3d * 2).all().all().all()
assert len(doubled[0, :, :]) == 0
assert doubled[1, 0, :] == [2]
assert doubled[2, 0, :] == [4]
assert all(doubled[2, 1, :] == [4, 6])

fake_3d_2 = [[np.arange(i + 3) + j
for i in range(j % 2)]
for j in range(len(wrapped_tree))]
fake_3d_2 = JaggedArray.fromiter(fake_3d_2)
wrapped_tree.new_variable("SecondFake3D", fake_3d_2)

with pytest.raises(RuntimeError) as e:
expressions.evaluate(wrapped_tree, "SecondFake3D + Fake3D")
assert "different jaggedness" in str(e)

0 comments on commit dcbedb5

Please sign in to comment.