From 83a1f51bc7b64778be41143b83ef3708eff040af Mon Sep 17 00:00:00 2001 From: Alexander Puck Neuwirth Date: Thu, 25 Jan 2024 19:44:19 +0100 Subject: [PATCH] feat: Add support for POWHEG weights (#220) * Add support for reading POWHEG weights. * Add tests for POWHEG. - Update scikit-hep-testdata test dependency to include version with POWHEG file. --- setup.py | 2 +- src/pylhe/__init__.py | 33 +++++++++++++++++++++++ tests/test_lhe_reader.py | 56 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d36c8a38..ff1a8e99 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ { "pytest~=6.0", "pytest-cov>=2.5.1", - "scikit-hep-testdata>=0.4.0", + "scikit-hep-testdata>=0.4.36", "pydocstyle", } ) diff --git a/src/pylhe/__init__.py b/src/pylhe/__init__.py index de11a7c2..8f92e1a3 100644 --- a/src/pylhe/__init__.py +++ b/src/pylhe/__init__.py @@ -248,6 +248,7 @@ def read_lhe_init(filepath): initDict["procInfo"] = [LHEProcInfo.fromstring(d) for d in data[1:]] if element.tag == "initrwgt": initDict["weightgroup"] = {} + index = 0 for child in element: # Find all weightgroups if child.tag == "weightgroup" and child.attrib != {}: @@ -274,7 +275,9 @@ def read_lhe_init(filepath): _temp["weights"][wg_id] = { "attrib": w.attrib, "name": w.text.strip(), + "index": index, } + index += 1 initDict["weightgroup"][wg_type] = _temp if element.tag == "LesHouchesEvents": @@ -300,11 +303,33 @@ def read_lhe(filepath): return +def _get_index_to_id_map(init): + """ + Produce a dictionary to map weight indices to the id of the weight. + + It is used for LHE files where there is only a list of weights per event. + This dictionary is then used to map the list of weights to their weight id. + Ideally, this needs to be done only once and the dictionary can be reused. + + Args: + init (dict): init block as returned by read_lhe_init + + Returns: + dict: {weight index: weight id} + """ + ret = {} + for wg in init["weightgroup"].values(): + for id, w in wg["weights"].items(): + ret[w["index"]] = id + return ret + + def read_lhe_with_attributes(filepath): """ Iterate through file, similar to read_lhe but also set weights and attributes. """ + index_map = None try: with _extract_fileobj(filepath) as fileobj: for event, element in ET.iterparse(fileobj, events=["end"]): @@ -323,6 +348,14 @@ def read_lhe_with_attributes(filepath): else: eventdict["optional"].append(p.strip()) for sub in element: + if sub.tag == "weights": + if not index_map: + index_map = _get_index_to_id_map( + read_lhe_init(filepath) + ) + for i, w in enumerate(sub.text.split()): + if w: + eventdict["weights"][index_map[i]] = float(w) if sub.tag == "rwgt": for r in sub: if r.tag == "wgt": diff --git a/tests/test_lhe_reader.py b/tests/test_lhe_reader.py index 8a8cc736..c58e20ff 100644 --- a/tests/test_lhe_reader.py +++ b/tests/test_lhe_reader.py @@ -12,6 +12,14 @@ TEST_FILE_LHE_v1 = skhep_testdata.data_path("pylhe-testfile-pr29.lhe") TEST_FILE_LHE_v3 = skhep_testdata.data_path("pylhe-testlhef3.lhe") +TEST_FILE_LHE_INITRWGT_WEIGHTS = skhep_testdata.data_path( + "pylhe-testfile-powheg-box-v2-hvq.lhe" +) +TEST_FILE_LHE_RWGT_WGT = skhep_testdata.data_path("pylhe-testfile-powheg-box-v2-W.lhe") +TEST_FILES_LHE_POWHEG = [ + skhep_testdata.data_path("pylhe-testfile-powheg-box-v2-%s.lhe" % (proc)) + for proc in ["Z", "W", "Zj", "trijet", "directphoton", "hvq"] +] @pytest.fixture(scope="session") @@ -135,6 +143,54 @@ def test_read_lhe_with_attributes_v3(): assert isinstance(e, LHEEvent) +@pytest.mark.parametrize("file", TEST_FILES_LHE_POWHEG) +def test_read_lhe_powheg(file): + """ + Test method read_lhe() on several types of LesHouchesEvents POWHEG files. + """ + events = pylhe.read_lhe(file) + + assert events + for e in events: + assert isinstance(e, LHEEvent) + + +@pytest.mark.parametrize("file", TEST_FILES_LHE_POWHEG) +def test_read_lhe_with_attributes_powheg(file): + """ + Test method read_lhe_with_attributes() on several types of LesHouchesEvents POWHEG files. + """ + events = pylhe.read_lhe_with_attributes(file) + + assert events + for e in events: + assert isinstance(e, LHEEvent) + + +def test_read_lhe_initrwgt_weights(): + """ + Test the weights from initrwgt with a weights list. + """ + events = pylhe.read_lhe_with_attributes(TEST_FILE_LHE_INITRWGT_WEIGHTS) + + assert events + for e in events: + assert isinstance(e, LHEEvent) + assert len(e.weights) > 0 + + +def test_read_lhe_rwgt_wgt(): + """ + Test the weights from rwgt with a wgt list. + """ + events = pylhe.read_lhe_with_attributes(TEST_FILE_LHE_RWGT_WGT) + + assert events + for e in events: + assert isinstance(e, LHEEvent) + assert len(e.weights) > 0 + + def test_issue_102(): """ Test a file containing lines starting with "#aMCatNLO".