Skip to content

Commit 75ff97d

Browse files
ponyisiBenGalewsky
authored andcommitted
Initial attempt at RNTuple support in framework and uproot-raw transformer
1 parent 8037d46 commit 75ff97d

File tree

7 files changed

+30
-15
lines changed

7 files changed

+30
-15
lines changed

Diff for: code_generator_raw_uproot/servicex/raw_uproot_code_generator/request_translator.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def run_single_query(file_path, query):
153153
if 'cut' in sanitized_args:
154154
sanitized_args.pop('cut')
155155
arr = t.arrays(language=lang, entry_stop=0, **sanitized_args)
156-
rv_arrays_trees[outtreename] = (None, {{_: arr[_].type for _ in arr.fields}})
156+
rv_arrays_trees[outtreename] = (None, arr.layout)
157157
else:
158158
histograms = query['copy_histograms']
159159
keys = fl.keys(filter_name=histograms, cycle=False)

Diff for: code_generator_raw_uproot/servicex/templates/transform_single_file.py

+21-7
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,34 @@ def transform_single_file(file_path: str, output_path: Path, output_format: str)
2626

2727
ttime = time.time()
2828

29-
if output_format == 'root-file':
29+
if output_format in ('root-file', 'root-rntuple'):
3030
import uproot
3131
etime = time.time()
3232
# opening the file with open() is a workaround for a bug handling multiple colons
3333
# in the filename in uproot 5.3.9
3434
with open(output_path, 'b+w') as wfile:
3535
with uproot.recreate(wfile) as writer:
3636
for k, v in awkward_array_dict.items():
37-
if v[0] is not None:
38-
writer[k] = {field: v[0][field] for field in
39-
v[0].fields} if v[0].fields \
40-
else v[0]
41-
else:
42-
writer.mktree(k, v[1])
37+
if output_format == 'root-file':
38+
if v[0] is not None:
39+
writer[k] = {field: v[0][field] for field in
40+
v[0].fields}
41+
else:
42+
writer.mktree(k, dict(zip(v[1].form.columns(),
43+
v[1].form.column_types())))
44+
else: # RNTuple
45+
if v[0] is not None:
46+
# Work around a limitation in uproot 5.6.0
47+
# If a cut is specified, we'll get ListArrays which can't be
48+
# written via uproot. Convert them to ListOffsetArrays
49+
# Assume the ListArrays are only at top level
50+
warr = ak.zip({_: v[0][_].layout.to_ListOffsetArray64()
51+
if isinstance(v[0][_].layout, ak.contents.ListArray)
52+
else v[0][_]
53+
for _ in v[0].fields}, depth_limit=1)
54+
writer.mkrntuple(k, warr)
55+
else:
56+
writer.mkrntuple(k, v[1].form)
4357
for k, v in histograms.items():
4458
writer[k] = v
4559
wtime = time.time()

Diff for: code_generator_raw_uproot/tests/test_src.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def test_generate_code():
5050
'filter_name': ['lbn']},
5151
{'copy_histograms': 'CutBookkeeper*'}
5252
])
53-
expected_hash = "6f8ac79962ef753d3e7fd6161cba6fe8"
53+
expected_hash = "9d7f01cdc875549daf3eed81a502147a"
5454
result = translator.generate_code(query, tmpdirname)
5555

5656
# is the generated code at least syntactically valid Python?

Diff for: code_generator_raw_uproot/transformer_capabilities.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"name": "Uproot transformer using native uproot arguments",
33
"description": "Extracts data from flat ntuple style root files.",
44
"limitations": "Would be good to note what isn't implemented",
5-
"file-formats": ["parquet"],
5+
"file-formats": ["parquet", "root-file", "root-rntuple"],
66
"stats-parser": "RawUprootStats",
77
"language": "python",
88
"command": "/generated/transform_single_file.py"

Diff for: servicex_app/servicex_app/resources/transformation/submit.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ def make_api(cls, rabbitmq_adaptor, object_store,
7676
TransformRequest.VOLUME_DEST
7777
])
7878
cls.parser.add_argument(
79-
'result-format', choices=['arrow', 'parquet', 'root-file'], default='arrow'
79+
'result-format', choices=['parquet', 'root-file',
80+
'root-rntuple'], default='parquet'
8081
)
8182
return cls
8283

Diff for: transformer_sidecar/src/transformer_sidecar/transformer_argument_parser.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ def __init__(self, description="ServiceX Transformer"):
5656
default=None, help='Local directory to output results')
5757

5858
self.add_argument('--result-format', dest='result_format', action='store',
59-
default='arrow', help='arrow, parquet, root-file',
60-
choices=['arrow', 'parquet', 'root-file'])
59+
default='parquet', help='parquet, root-file, root-rntuple',
60+
choices=['parquet', 'root-file', 'root-rntuple'])
6161

6262
self.add_argument('--rabbit-uri', dest="rabbit_uri", action='store',
6363
default='host.docker.internal')

Diff for: transformer_sidecar/tests/test_transformer_argument_parser.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def test_parse(self, mocker):
4444
"--path", "/foo/bar",
4545
"--limit", "10",
4646
'--result-destination', 'object-store',
47-
'--result-format', 'arrow',
47+
'--result-format', 'parquet',
4848
'--rabbit-uri', "http://rabbit.org",
4949
'--request-id', "123-45-678"
5050
]
@@ -53,7 +53,7 @@ def test_parse(self, mocker):
5353
assert args.path == '/foo/bar'
5454
assert args.limit == 10
5555
assert args.result_destination == 'object-store'
56-
assert args.result_format == 'arrow'
56+
assert args.result_format == 'parquet'
5757
assert args.rabbit_uri == "http://rabbit.org"
5858
assert args.request_id == "123-45-678"
5959

0 commit comments

Comments
 (0)