diff --git a/analyzers/dataframe/FCCAnalyses/ReconstructedParticle.h b/analyzers/dataframe/FCCAnalyses/ReconstructedParticle.h index 8236e4100f0..02793983fe2 100644 --- a/analyzers/dataframe/FCCAnalyses/ReconstructedParticle.h +++ b/analyzers/dataframe/FCCAnalyses/ReconstructedParticle.h @@ -2,12 +2,18 @@ #ifndef RECONSTRUCTEDPARTICLE_ANALYZERS_H #define RECONSTRUCTEDPARTICLE_ANALYZERS_H +// STL #include #include +// ROOT #include "TLorentzVector.h" #include "ROOT/RVec.hxx" + +// EDM4hep #include "edm4hep/ReconstructedParticleData.h" +#include "edm4hep/ReconstructedParticleCollection.h" +#include "edm4hep/MCRecoParticleAssociationCollection.h" #include "edm4hep/ParticleIDData.h" namespace FCCAnalyses{ @@ -80,6 +86,21 @@ namespace ReconstructedParticle{ }; + /** + * \brief Analyzer to select reconstructed particles associated with a + * specified PDG ID. + * + * \param pdgID Desired PDG ID. + * \param chargeConjugateAllowed Whether to allow also charge conjugate + * PDG ID. Default value false --- charge conjugate not allowed. + */ + struct selPDG { + selPDG(const int pdgID, const bool chargeConjugateAllowed = false); + const int m_pdg; + const bool m_chargeConjugateAllowed; + edm4hep::ReconstructedParticleCollection operator() ( + const edm4hep::MCRecoParticleAssociationCollection& inAssocColl); + }; /// return reconstructed particles @@ -88,6 +109,10 @@ namespace ReconstructedParticle{ /// return the transverse momenta of the input ReconstructedParticles ROOT::VecOps::RVec get_pt(ROOT::VecOps::RVec in); + /// return the transverse momenta of the input ReconstructedParticles + ROOT::VecOps::RVec + getPt(const edm4hep::ReconstructedParticleCollection& inParticles); + /// return the momenta of the input ReconstructedParticles ROOT::VecOps::RVec get_p(ROOT::VecOps::RVec in); diff --git a/analyzers/dataframe/FCCAnalyses/Track.h b/analyzers/dataframe/FCCAnalyses/Track.h index 9a54c3c1aac..92202bf75f2 100644 --- a/analyzers/dataframe/FCCAnalyses/Track.h +++ b/analyzers/dataframe/FCCAnalyses/Track.h @@ -14,9 +14,9 @@ namespace FCCAnalyses { /** * \brief Analyzer to select tracks associated with a specified PDG ID. * - * \param pdgID Desired PDG ID. - * \param chargeConjugateAllowed Whether to allow also charge conjugate - * PDG ID. Default value false --- charge cojugate not allowed. + * \param pdgID Desired PDG ID. + * \param chargeConjugateAllowed Whether to allow also charge conjugate + * PDG ID. Default value false --- charge conjugate not allowed. */ struct selPDG { selPDG(const int pdgID, const bool chargeConjugateAllowed = false); diff --git a/analyzers/dataframe/src/ReconstructedParticle.cc b/analyzers/dataframe/src/ReconstructedParticle.cc index fd34a17f234..d4598b977da 100644 --- a/analyzers/dataframe/src/ReconstructedParticle.cc +++ b/analyzers/dataframe/src/ReconstructedParticle.cc @@ -65,6 +65,33 @@ ROOT::VecOps::RVec sel_charge::operator() ( } +selPDG::selPDG(const int pdg, const bool chargeConjugateAllowed) : + m_pdg(pdg), m_chargeConjugateAllowed(chargeConjugateAllowed) {}; + +edm4hep::ReconstructedParticleCollection selPDG::operator() ( + const edm4hep::MCRecoParticleAssociationCollection& inAssocColl) { + edm4hep::ReconstructedParticleCollection result; + result.setSubsetCollection(); + + for (const auto& assoc: inAssocColl) { + const auto& particle = assoc.getSim(); + if (m_chargeConjugateAllowed) { + if (std::abs(particle.getPDG() ) == std::abs(m_pdg)) { + result.push_back(assoc.getRec()); + } + } + else { + if (particle.getPDG() == m_pdg) { + result.push_back(assoc.getRec()); + } + } + } + + return result; +} + + + resonanceBuilder::resonanceBuilder(float arg_resonance_mass) {m_resonance_mass = arg_resonance_mass;} ROOT::VecOps::RVec resonanceBuilder::operator()(ROOT::VecOps::RVec legs) { @@ -195,6 +222,20 @@ ROOT::VecOps::RVec get_pt(ROOT::VecOps::RVec +getPt(const edm4hep::ReconstructedParticleCollection& inParticles) { + ROOT::VecOps::RVec result; + + for (const auto& particle: inParticles) { + result.push_back(std::sqrt(std::pow(particle.getMomentum().x, 2) + + std::pow(particle.getMomentum().y, 2))); + } + + return result; +} + + ROOT::VecOps::RVec merge(ROOT::VecOps::RVec x, ROOT::VecOps::RVec y) { //to be keept as ROOT::VecOps::RVec std::vector result; diff --git a/e4hsource/src/EDM4hepLegacySource.cxx b/e4hsource/src/EDM4hepLegacySource.cxx index 46421436d10..6d2b18be013 100644 --- a/e4hsource/src/EDM4hepLegacySource.cxx +++ b/e4hsource/src/EDM4hepLegacySource.cxx @@ -60,9 +60,11 @@ namespace FCCAnalyses { auto legacyMetadata = infile.Get("metadata"); infile.Close(); if (!legacyMetadata) { - throw std::runtime_error( - "EDM4hepLegacySource: Provided file is missing legacy podio " - "metadata!"); + std::string errMsg = "EDM4hepLegacySource: "; + errMsg += "Provided file is missing podio metadata!\n"; + errMsg += " "; + errMsg += filePath.data(); + throw std::runtime_error(errMsg); } } @@ -73,10 +75,6 @@ namespace FCCAnalyses { // << std::endl; podio::ROOTLegacyReader podioLegacyReader; - std::vector> readers; - readers.emplace_back(std::make_unique()); - - podioLegacyReader.openFiles(m_filePathList); nEventsInFiles = podioLegacyReader.getEntries("events"); frame = podio::Frame(podioLegacyReader.readEntry("events", 0)); diff --git a/e4hsource/test/histmaker_legacy_source.py b/e4hsource/test/histmaker_legacy_source.py deleted file mode 100644 index f83b1a5dee2..00000000000 --- a/e4hsource/test/histmaker_legacy_source.py +++ /dev/null @@ -1,57 +0,0 @@ -# list of processes (mandatory) -processList = { - 'HNL_50_all_REC_EDM4Hep': {'fraction': 1, - 'crossSection': 2.29*10**(-8)}, -} - -# Link to the dictonary that contains all the cross section information -# etc... (mandatory) -procDict = "FCCee_procDict_winter2023_IDEA.json" -procDictAdd = { - "HNL_50_all_REC_EDM4Hep": {"numberOfEvents": 50000, - "sumOfWeights": 50000, - "crossSection": 2.29*10**(-8), - "kfactor": 1.0, - "matchingEfficiency": 1.0}, -} - -# Define the input dir (optional) -inputDir = "/eos/home-j/jandrea/SampleFCCee_HNL/_HNL_50_RECO_EDM4Hep/merged" - -# Output directory, default is local running directory -outputDir = "." - -# Ncpus, default is 4, -1 uses all cores available -nCPUS = -1 - -# scale the histograms with the cross-section and integrated luminosity -doScale = True -intLumi = 150000000 # 150 /ab - -# define some binning for various histograms -bins_p_el = (2000, 0, 200) # 100 MeV bins -bins_d0 = (20, 0, 100) -bins_n_states = (30, 0, 30) - -# build_graph function that contains the analysis logic, cuts and histograms -# (mandatory) -def build_graph(df, dataset): - - results = [] - df = df.Define("weight", "1.0") - weightsum = df.Sum("weight") - - df = df.Define("Electron_tracks", - "FCCAnalyses::Track::selPDG(11)(SiTracksMCTruthLink)") - - df = df.Define("Electron_nTrackStates", - "FCCAnalyses::Track::getNstates(Electron_tracks)") - df = df.Define("Electron_track_d0", - "FCCAnalyses::Track::getD0(Electron_tracks)") - - results.append(df.Histo1D(("electron_track_d0", "", *bins_d0), - "Electron_track_d0")) - results.append(df.Histo1D(("electron_nTractStates", "", *bins_n_states), - "Electron_nTrackStates")) - - return results, weightsum diff --git a/e4hsource/test/histmaker_source.py b/e4hsource/test/histmaker_source.py new file mode 100644 index 00000000000..476e1017217 --- /dev/null +++ b/e4hsource/test/histmaker_source.py @@ -0,0 +1,49 @@ +# list of processes (mandatory) +processList = { + 'p8_ee_WW_ecm240': {'output': 'p8_ee_WW_ecm240_out'} +} + +# Production tag when running over EDM4Hep centrally produced events, this +# points to the yaml files for getting sample statistics (mandatory) +prodTag = "FCCee/winter2023/IDEA/" + +# Link to the dictonary that contains all the cross section informations +# etc... (mandatory) +procDict = "FCCee_procDict_winter2023_IDEA.json" + +# Optional: output directory, default is local running directory +outputDir = "." + +# Ncpus, default is 4, -1 uses all cores available +nCPUS = -1 + +# scale the histograms with the cross-section and integrated luminosity +# define some binning for various histograms +bins_pt = (20, 0, 200) + +# How to read input files +useDataSource = True + +testFile = '/eos/experiment/fcc/ee/generation/DelphesEvents/winter2023/' \ + 'IDEA/p8_ee_WW_ecm240/events_192112516.root' + +# build_graph function that contains the analysis logic, cuts and histograms +# (mandatory) +def build_graph(df, dataset): + + results = [] + df = df.Define("weight", "1.0") + weightsum = df.Sum("weight") + + df = df.Define( + "electron_truth", + "FCCAnalyses::ReconstructedParticle::selPDG(11)(MCRecoAssociations)") + + df = df.Define( + "electron_truth_pt", + "FCCAnalyses::ReconstructedParticle::getPt(electron_truth)") + + results.append(df.Histo1D(("h_electron_truth_pt", "", *bins_pt), + "electron_truth_pt")) + + return results, weightsum diff --git a/python/FCCAnalysisRun.py b/python/FCCAnalysisRun.py index 379794516b8..0f370bc067f 100644 --- a/python/FCCAnalysisRun.py +++ b/python/FCCAnalysisRun.py @@ -184,7 +184,7 @@ def initialize(args, rdfModule, analysisFile): #__________________________________________________________ def runRDF(rdfModule, inputlist, outFile, nevt, args): - if args.use_source: + if args.use_data_source: print('----> Info: Loading events through EDM4hep RDataSource...') ROOT.gSystem.Load("libe4hsource") if ROOT.loadEDM4hepSource(): @@ -356,16 +356,25 @@ def apply_filepath_rewrites(filepath): return filepath -#__________________________________________________________ +# __________________________________________________________ def runLocal(rdfModule, infile_list, args): # Create list of files to be processed - print ('----> Info: Creating dataframe object from files: ', ) + print('----> Info: Creating dataframe object from files: ', ) + + # Check if to use RDataSource to load the events + use_data_source = getElement(rdfModule, "useDataSource") + if use_data_source: + args.use_data_source = True + use_legacy_source = getElement(rdfModule, "useLegacyDataSource") + if use_legacy_source: + args.use_legacy_source = True + file_list = ROOT.vector('string')() nevents_orig = 0 # Amount of events processed in previous stage (= 0 if it is the first stage) nevents_local = 0 # The amount of events in the input file(s) for filepath in infile_list: - if not args.use_source: + if not args.use_data_source and not args.use_legacy_source: filepath = apply_filepath_rewrites(filepath) file_list.push_back(filepath) @@ -888,17 +897,42 @@ def runHistmaker(args, rdfModule, analysisFile): evtcounts = [] # event count of the input file eventsProcessedDict = {} # number of events processed per process, in a potential previous step + # Check whether to load events through datasource + use_legacy_source = getElement(rdfModule, 'useLegacyDataSource') + if use_legacy_source: + args.use_legacy_source = True + use_data_source = getElement(rdfModule, 'useDataSource') + if use_data_source: + args.use_data_source = True + + if args.test: + testFile = getElement(rdfModule, "testFile") + if testFile: + print('----> Info: Will use the following test file:') + print(' ' + testFile) + for process in processList: - fileList, eventList = getProcessInfo(process, getElement(rdfModule,"prodTag"), getElement(rdfModule, "inputDir")) + prod_tag = getElement(rdfModule, 'prodTag') + input_dir = getElement(rdfModule, 'inputDir') + fileList, eventList = getProcessInfo(process, prod_tag, input_dir) if len(fileList) == 0: print('----> ERROR: No files to process. Exit') sys.exit(3) + if args.test: + testFile = getElement(rdfModule, "testFile") + if testFile: + print('----> Info: Will use 100 events from the following ' + 'test file:') + print(' ' + testFile) + fileList = [testFile] + eventList = [100] + # get the number of events processed, in a potential previous step fileListRoot = ROOT.vector('string')() nevents_meta = 0 # amount of events processed in previous stage (= 0 if it is the first stage) for fileName in fileList: - if not (args.use_source or args.use_legacy_source): + if not args.use_data_source and not args.use_legacy_source: fileName = apply_filepath_rewrites(fileName) fileListRoot.push_back(fileName) tf=ROOT.TFile.Open(str(fileName),"READ") @@ -925,7 +959,7 @@ def runHistmaker(args, rdfModule, analysisFile): if fraction<1:fileList = getsubfileList(fileList, eventList, fraction) print ('----> Info: Add process {} with fraction={}, nfiles={}, output={}, chunks={}'.format(process, fraction, len(fileList), output, chunks)) - if args.use_source: + if args.use_data_source: print('----> Info: Loading events through EDM4hep RDataSource...') ROOT.gSystem.Load("libe4hsource") if ROOT.loadEDM4hepSource(): diff --git a/python/Parsers.py b/python/Parsers.py index 9a282bd1797..8518dd1b1c8 100644 --- a/python/Parsers.py +++ b/python/Parsers.py @@ -71,8 +71,9 @@ def setup_run_parser(parser): help='specify max number of events to process') parser.add_argument('--test', action='store_true', default=False, help='run over the test file') - parser.add_argument('--use-source', action='store_true', default=False, - help='use EDM4hep RDataSource to construct dataframe') + parser.add_argument( + '--use-data-source', action='store_true', default=False, + help='use EDM4hep RDataSource to construct dataframe') parser.add_argument( '--use-legacy-source', action='store_true', default=False, help='use EDM4hep Legacy RDataSource to construct dataframe') diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9cae25a5d10..40cad99d44c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,6 +6,7 @@ add_integration_test("examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py") add_integration_test("examples/FCCee/flavour/Bc2TauNu/analysis_B2TauNu_truth.py") add_integration_test("examples/FCCee/test/jet_constituents.py") add_integration_test("examples/FCCee/vertex_lcfiplus/analysis_V0.py") +add_integration_test("e4hsource/test/histmaker_source.py") # TODO: make this test run in the spack build environment #add_generic_test(build_new_case_study "tests/build_new_case_study.sh")