Skip to content
This repository has been archived by the owner on Mar 31, 2019. It is now read-only.

Commit

Permalink
implemented foreach-style iteration
Browse files Browse the repository at this point in the history
  • Loading branch information
jpivarski committed Jul 28, 2017
1 parent 44bd081 commit b484f0a
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 11 deletions.
13 changes: 6 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ Let's illustrate the PLUR concept and its Python/Numpy implementation with an ex

## Particle physics example

To follow along, check out [Revision 167](https://github.com/diana-hep/plur/releases/tag/rev167) and
To follow along, check out [Revision 193](https://github.com/diana-hep/plur/releases/tag/rev193) and

```bash
python setup.py install --user
Expand Down Expand Up @@ -293,7 +293,7 @@ On my laptop, this took 25 seconds. Only five of the thirty arrays were actually

As an alternative to passing proxy objects in dynamic Python, we could translate the Python code to pass integer indexes and interpret them correctly. This involves something like a compiler pass, propagating PLUR data types through the code to insert index interpretations at the appropriate places, which can be performed rigorously at the level of [abstract syntax trees](https://en.wikipedia.org/wiki/Abstract_syntax_tree).

This PLUR implementation has experimental support for code transformation, though the interface is currently rough (no error checking!). We can't use foreach-style loops yet, but eventually we'll be able to put exactly the same code that works with proxies into the code transformation tool and get a large speedup for free.
This PLUR implementation has experimental support for code transformation, though the interface is currently rough (no error checking!). However, we can already put exactly the same code that works with proxies into the code transformation tool and get a large speedup for free.

Here's an illustration:

Expand All @@ -304,11 +304,9 @@ from plur.compile import local

def doit(events):
psum = 0.0
for i in range(len(events)):
for j in range(len(events[i].muons)):
psum += math.sqrt(events[i].muons[j].px**2 +
events[i].muons[j].py**2 +
events[i].muons[j].pz**2)
for event in events:
for muon in event.muons:
psum += math.sqrt(muon.px**2 + muon.py**2 + muon.pz**2)
return psum

fcn, arrayparams = local(doit, arrays2type(arrays, "events"), environment={"math": math})
Expand Down Expand Up @@ -389,4 +387,5 @@ To a user, this would require a combined query: an SQL part (for the database) f
* Maybe require Femtocode-style constraints on list indexes and union members to eliminate this type of runtime error.
* Simple extension types, such as strings (`List(uint8)`), nullable/optional (`List(X)`), and pointers (`int64` with a list reference).
* Use pointers as event lists and database-style indexes: essential for query engine.
* Streaming iteration over Lists using dynamically-generated partitions.
* Integrate with ROOT, zero-copy interpreting internal TBuffer data as PLUR data (requires ROOT updates).
36 changes: 32 additions & 4 deletions plur/compile/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,10 +369,10 @@ def subunionop(tpe, node):
assert isinstance(node.args[0].slice, ast.Index)
assert isinstance(node.args[0].ctx, ast.Load)

current = node.args[0]
plusone = generate(None, "offset[i + 1]", offset=node.args[0].value, i=node.args[0].slice.value)

return generate(tpe, "plusone - current", plusone=plusone, current=current)
return generate(tpe, "offset[i + 1] - current",
offset=node.args[0].value,
i=node.args[0].slice.value,
current=node.args[0])

elif hasattr(node.args[0], "plurtype") and isinstance(node.args[0].plurtype, Union):
return node.args[0]
Expand Down Expand Up @@ -430,6 +430,34 @@ def subunionop(tpe, node):
# FloorDiv ()

# For ("target", "iter", "body", "orelse")
def do_For(node, symboltypes, environment, enclosedfcns, encloseddata, zeros, recurse, colname, unionop):
node.iter = recurse(node.iter)
node.target = recurse(node.target)

if hasattr(node.iter, "plurtype") and isinstance(node.iter.plurtype, List) and isinstance(node.target, ast.Name) and isinstance(node.target.ctx, ast.Store):
tpe = node.iter.plurtype

if isinstance(node.iter, ast.Num):
assert node.iter.n == 0
node.iter = generate(tpe, "range(offset[1])", offset=ast.Name(colname(tpe.column), ast.Load()))

else:
assert isinstance(node.iter, ast.Subscript)
assert isinstance(node.iter.value, ast.Name)
assert isinstance(node.iter.slice, ast.Index)
assert isinstance(node.iter.ctx, ast.Load)

node.iter = generate(tpe, "range(current, offset[i + 1])",
offset=node.iter.value,
i=node.iter.slice.value,
current=node.iter)

symboltypes[node.target.id] = tpe.of

node.body = recurse(node.body)
node.orelse = recurse(node.orelse)

return node

# FunctionDef ("name", "args", "body", "decorator_list") # Py2
# FunctionDef ("name", "args", "body", "decorator_list", "returns") # Py3
Expand Down
28 changes: 28 additions & 0 deletions tests/test_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,34 @@ def f(x, i, j):
same([T([], []), T([1, 2], [1, 2]), T([3, 4, 5], [3, 4, 5])], lambda x, i: len(x[i].one), [0, 1, 2])
same([T([1], [1]), T2([1, 2], [1, 2]), T([3, 4, 5], [3, 4, 5])], lambda x, i: len(x[i].one), [0, 1, 2])

####### iter

def doit(x, dummy):
out = 0.0
for y in x:
out += y
return out

same([1, 2, 3, 4, 5], doit, [0])

def doit(x, dummy):
out = 0.0
for y in x:
for z in y:
out += z
return out

same([[], [1, 2], [3, 4, 5]], doit, [0])

def doit(x, dummy):
out = 0.0
for y in x:
for z in y.one:
out += z
return out

same([T([], 0), T([1, 2], 0), T([3, 4, 5], 0)], doit, [0])

def test_local(self):
data = [[], [1, 2], [3, 4, 5]]
arrays = toarrays("prefix", data)
Expand Down

0 comments on commit b484f0a

Please sign in to comment.