Skip to content

Commit

Permalink
adding coffea example
Browse files Browse the repository at this point in the history
  • Loading branch information
Eduardo Ploerer committed Jun 9, 2022
1 parent aa8bb56 commit c1cb05b
Show file tree
Hide file tree
Showing 4 changed files with 576 additions and 0 deletions.
136 changes: 136 additions & 0 deletions examples/FCCee/coffea/analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import sys
import ROOT

print ("Load cxx analyzers ... ",)
ROOT.gSystem.Load("libedm4hep")
ROOT.gSystem.Load("libpodio")
ROOT.gSystem.Load("libFCCAnalyses")
ROOT.gErrorIgnoreLevel = ROOT.kFatal
_edm = ROOT.edm4hep.ReconstructedParticleData()
_pod = ROOT.podio.ObjectID()
_fcc = ROOT.dummyLoader

print ('edm4hep ',_edm)
print ('podio ',_pod)
print ('fccana ',_fcc)

class analysis():

#__________________________________________________________
def __init__(self, inputlist, outname, ncpu):
self.outname = outname
if ".root" not in outname:
self.outname+=".root"

ROOT.ROOT.EnableImplicitMT(ncpu)

self.df = ROOT.RDataFrame("events", inputlist)
print (" done")
#__________________________________________________________
def run(self):
df2 = (self.df

.Alias("Particle0", "Particle#0.index")
#.Alias("Particle_index", "Particle.index")
#.Alias("Particle1", "Particle#1.index")
.Alias("Jet3", "Jet#3.index") #alias for dealing with # in python


.Define("MC_px", "MCParticle::get_px(Particle)")
.Define("MC_py", "MCParticle::get_py(Particle)")
.Define("MC_pz", "MCParticle::get_pz(Particle)")
.Define("MC_p", "MCParticle::get_p(Particle)")
.Define("MC_e", "MCParticle::get_e(Particle)")
.Define("MC_theta", "MCParticle::get_theta(Particle)")
.Define("MC_pdg", "MCParticle::get_pdg(Particle)")
.Define("MC_status", "MCParticle::get_genStatus(Particle)")

#selecting only final particles (status=1)
.Define("MC_final", "MCParticle::sel_genStatus(1)(Particle)")

#momenta etc of final particles
.Define("MC_px_f", "MCParticle::get_px(MC_final)")
.Define("MC_py_f", "MCParticle::get_py(MC_final)")
.Define("MC_pz_f", "MCParticle::get_pz(MC_final)")
.Define("MC_e_f", "MCParticle::get_e(MC_final)")
.Define("MC_theta_f", "MCParticle::get_theta(MC_final)")
.Define("MC_pdg_f", "MCParticle::get_pdg(MC_final)")


#EE-KT ALGORITHM

#build psedo-jets with the MC final particles (status = 0)
.Define("pseudo_jets", "JetClusteringUtils::set_pseudoJets(MC_px_f, MC_py_f, MC_pz_f, MC_e_f)")

#run jet clustering with final state MC particles. ee_kt_algorithm, exclusive clustering, exactly 2 jets, E-scheme
.Define("FCCAnalysesJets_ee_kt", "JetClustering::clustering_ee_kt(2, 2, 1, 0)(pseudo_jets)")

#get the jets out of the structure
.Define("jets_ee_kt", "JetClusteringUtils::get_pseudoJets(FCCAnalysesJets_ee_kt)")

#get the jet constituents out of the structure
.Define("jetconstituents_ee_kt", "JetClusteringUtils::get_constituents(FCCAnalysesJets_ee_kt)")

#get some jet variables
.Define("jets_ee_kt_e", "JetClusteringUtils::get_e(jets_ee_kt)")
.Define("jets_ee_kt_px", "JetClusteringUtils::get_px(jets_ee_kt)")
.Define("jets_ee_kt_py", "JetClusteringUtils::get_py(jets_ee_kt)")
.Define("jets_ee_kt_pz", "JetClusteringUtils::get_pz(jets_ee_kt)")
.Define("jets_ee_kt_flavour", "JetTaggingUtils::get_flavour(jets_ee_kt, Particle)")
)




# select branches for output file
branchList = ROOT.vector('string')()
for branchName in [
"MC_px",
"MC_py",
"MC_pz",
"MC_p",
"MC_e",
"MC_theta",
"MC_pdg",
"MC_status",

"MC_px_f",
"MC_py_f",
"MC_pz_f",
"MC_e_f",
"MC_theta_f",
"MC_pdg_f",

"jets_ee_kt_e",
"jets_ee_kt_px",
"jets_ee_kt_py",
"jets_ee_kt_pz",
"jets_ee_kt_flavour",
"jetconstituents_ee_kt",
]:
branchList.push_back(branchName)
df2.Snapshot("events", self.outname, branchList)

# example call for standalone file
# python examples/FCCee/higgs/mH-recoil/mumu/analysis.py /eos/experiment/fcc/ee/generation/DelphesEvents/fcc_tmp/p8_ee_ZH_ecm240/events_058720051.root
if __name__ == "__main__":

if len(sys.argv)==1:
print ("usage:")
print ("python ",sys.argv[0]," file.root")
sys.exit(3)
infile = sys.argv[1]
outDir = sys.argv[0].replace(sys.argv[0].split('/')[0],'outputs').replace('analysis.py','')+'/'
import os
os.system("mkdir -p {}".format(outDir))
outfile = outDir+infile.split('/')[-1]
ncpus = 0
print ('outfile ',outfile)
analysis = analysis(infile, outfile, ncpus)
analysis.run()

tf = ROOT.TFile(infile)
entries = tf.events.GetEntries()
p = ROOT.TParameter(int)( "eventsProcessed", entries)
outf=ROOT.TFile(outfile,"UPDATE")
p.Write()
176 changes: 176 additions & 0 deletions examples/FCCee/coffea/finalSel_coffea.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import uproot
import awkward as ak
import h5py as h5
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
from coffea import hist, processor
from coffea.nanoevents.methods import vector


ak.behavior.update(vector.behavior)



# As detailed in the coffea docs, the core of the analysis script is the Processor class defined below. The Processor.process function will be called on every batch being processed.

class MyProcessor(processor.ProcessorABC):
def __init__(self):


# We begin by defining the accumulator which will hold all quantities we intend to output. In this case it will be only the Z invariant mass and the flavour of the quarks.

Z_accumulator = {
'Z_mass': processor.column_accumulator(np.zeros([0])),
'pid': processor.column_accumulator(np.zeros([0])),
}
self._accumulator = processor.dict_accumulator( Z_accumulator )

@property
def accumulator(self):
return self._accumulator



def process(self, events):

output = self.accumulator.identity()


# Here, the jets are defined LorentzVector classes (meaning several attributes and operations, such as jets.eta, will be accessible)

jets = ak.zip({
"t": events.jets_ee_kt_e,
"x": events.jets_ee_kt_px,
"y": events.jets_ee_kt_py,
"z": events.jets_ee_kt_pz,
"partonFlavour": events.jets_ee_kt_flavour,
}, with_name="LorentzVector")


# The (stable) gen level particles are defined similarly, but with their corresponding arrays

GenPartsf = ak.zip({
"t": events.MC_e_f,
"x": events.MC_px_f,
"y": events.MC_py_f,
"z": events.MC_pz_f,
"pdg": events.MC_pdg_f,
}, with_name="LorentzVector")



# Here the flavour mask is defined by requiring both the quark and antiquark initiated jets to have the same flavour (which is non-negative). Unmatched or incorrectly matched jets result in a flavour of 0.

up_mask = (np.abs(jets.partonFlavour[:,0])==2)&(np.abs(jets.partonFlavour[:,1])==2)
down_mask = (np.abs(jets.partonFlavour[:,0])==1)&(np.abs(jets.partonFlavour[:,1])==1)
strange_mask = (np.abs(jets.partonFlavour[:,0])==3)&(np.abs(jets.partonFlavour[:,1])==3)
charm_mask = (np.abs(jets.partonFlavour[:,0])==4)&(np.abs(jets.partonFlavour[:,1])==4)
bottom_mask = (np.abs(jets.partonFlavour[:,0])==5)&(np.abs(jets.partonFlavour[:,1])==5)

pid = 2*up_mask+down_mask+3*strange_mask+4*charm_mask+5*bottom_mask



# jets[:,::2] selects only the leading jet of an event, while jets[:,1::2] selects the subleading. In this way the Z candidates are computed, having the same LorentzVector structure as the jets from which they are computed.

Z_cand = jets[:,::2]+jets[:,1::2]




#----------------------------------------------------------------------------------------------------------------------------------------------------------------------
#########################
#ASIDE: Jet constituents#
#########################

# Though not relevant in this simple example, in most cases one will want to have access to the jet constituents clustered into the first and second jets. This is possible by passing events.jetconstituents_ee_kt as a mask to the (stable) gen level particles. The events.jetconstituents_ee_kt array contains the indices of each (stable) gen level particle clustered into a given jet, and can thus be used to define a new collection consisting only the the particles in a given jet.

# For instance, the jet constituents of the leading jet are defined as

firstjetGenPartsf = ak.copy(GenPartsf[events.jetconstituents_ee_kt[:,0]])

# While the jet consituents of the sub-leading jet are defined as

secondjetGenPartsf = ak.copy(GenPartsf[events.jetconstituents_ee_kt[:,1]])
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Here the Z candidates with undefined flavours are removed. Note that this syntax is general and could be used for any cuts e.g. kinematic
# Requiring a jet mass > 80 GeV would be
# Z_cand = Z_cand[Z_cand.mass>80]

Z_cand = Z_cand[pid!=0]
pid = pid[pid!=0]


output['Z_mass']+= processor.column_accumulator(np.asarray(ak.flatten(Z_cand.mass)))
output['pid']+= processor.column_accumulator(np.asarray(pid))

return output

def postprocess(self, accumulator):
return accumulator

filename = "outputs/FCCee/coffea/p8_ee_Zuds_ecm91.root"
fileset = {'Zqq': [filename]}

# The processor run_uproot_job loads the events via uproot and executes process function on the defined batches. The 'maxchunks' argument defined the maximum number of chunks that will be processed, while the 'chunksize' defines the number of events per chunk. The 'workers' argument defines how many parallel processes are run.

output = processor.run_uproot_job(fileset,
treename='events',
processor_instance=MyProcessor(),
executor=processor.futures_executor,
executor_args={'schema': None, 'workers':4,},
maxchunks =20,
chunksize = 5000,
)


##########
#PLOTTING#
##########

Z_mass = output['Z_mass'].value
pid = output['pid'].value

# Here the Z invariant mass is histogrammed using coffea histograms. Subsequently, the invariant mass and quark flavour is saved as an .h5 file.

Z_cand = hist.Hist(
"Events",
hist.Cat("flavour", "Quark flavour"),
hist.Bin("m", r"$m_{q \bar{q}}$ [GeV]", 30, 90.75, 91.65),
)

for i_f, flav in enumerate(["down", "up", "strange"]):
Z_cand.fill(
flavour=flav,
m=Z_mass[(pid==(i_f+1))],
)

ax = hist.plot1d(
Z_cand,
overlay="flavour",
stack=True,
fill_opts={'alpha': .5, 'edgecolor': (0,0,0,0.3)}
)

ax.get_legend().shadow = True
ax.set_title(r'$Z \rightarrow q\bar{q}$ invariant mass')

plt.savefig('examples/FCCee/coffea/Zpeak.pdf')

h5_file = h5.File('examples/FCCee/coffea/Zpeak.h5', 'w')
h5_file.create_dataset("Z_mass", data=Z_mass)
h5_file.create_dataset("pid", data=pid)












Loading

0 comments on commit c1cb05b

Please sign in to comment.