forked from ucsd-hep-ex/L1JetTag
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hist_saver.py
117 lines (108 loc) · 4.83 KB
/
hist_saver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import h5py
import awkward as ak
import uproot
import argparse
import numpy as np
import pathlib
import matplotlib.pyplot as plt
import mplhep as hep
parser = argparse.ArgumentParser()
parser.add_argument('--QCD-part', action='store', type=str, required=False, help='file path for QCD file particle level features')
parser.add_argument('--stop-part', action='store', type=str, required=False, default=100, help='file path for stop file particle level features')
parser.add_argument('--QCD-bkg', action='store', type=str, required=False, help='file path for QCD background')
parser.add_argument('--stop-samp', action='store', type=str, required=False, help='file path for stop file sample')
parser.add_argument('--stop-bkg', action='store', type=str, required=False, help='file path for stop file background')
parser.add_argument('--output', action='store', type=str, required=True, help='output path')
args = parser.parse_args()
output_path = args.output #"C:\\Users\\eagle\\Research\\commit\\L1METML\\L1JetTag\\"
output_path = output_path + 'graph.root'
create_file = uproot.writing.writable.recreate(output_path)
#with uproot.open(output_path) as file:
mktree_dict = {}
file_list = []
if type(args.QCD_part) == str:
QCD_tree = {"QCD_dz": "var * float64",
"QCD_dx": "var * float64",
"QCD_dy": "var * float64",
"QCD_pt": "var * float64",
"QCD_eta": "var * float64",
"QCD_phi": "var * float64"}
mktree_dict = {**mktree_dict, **QCD_tree}
file_list.append('QCD')
if type(args.stop_part) == str:
stop_dict = {"stop_dz": "var * float64",
"stop_dx": "var * float64",
"stop_dy": "var * float64",
"stop_pt": "var * float64",
"stop_eta": "var * float64",
"stop_phi": "var * float64"}
mktree_dict = {**mktree_dict, **stop_dict}
file_list.append('stop')
create_file.mktree('particle_level', mktree_dict)
uproot_dict = {}
for kind in file_list:
for feat in [('dz',8), ('dx', 9), ('dy', 10), ('pt',11), ('eta', 12), ('phi', 13)]:
n_bins_conc = ak.Array([])
if kind == "QCD":
h5file = args.QCD_part #"trainingDatabkg.h5"
array_name = "Training Data"
elif kind == "stop":
h5file = args.stop_part #"trainingDatabkg.h5"
array_name = "Training Data"
with h5py.File(h5file, 'r') as h5f:
feat_array = h5f[array_name][:,feat[1]::14]
n, bins = np.histogram(feat_array, bins=100)
bin_width = bins[1] - bins[0]
weights = 1/np.sum(n*bin_width)
n = n*weights
n_bins = ak.Array([n, bins])
n_bins_conc = ak.concatenate((n_bins_conc, n_bins),axis=0)
uproot_dict[kind + "_" + feat[0]] = n_bins
create_file["particle_level"].extend(uproot_dict)
mktree_dict = {}
file_list = []
if type(args.QCD_bkg) == str:
QCD_bkg_dict = {"bkg_QCD_pt": "var * float64",
"bkg_QCD_eta": "var * float64",
"bkg_QCD_phi": "var * float64",
"bkg_QCD_m": "var * float64"}
mktree_dict = {**mktree_dict, **QCD_bkg_dict}
file_list.append("bkg_QCD")
if type(args.stop_bkg) == str:
stop_bkg_dict = {"bkg_stop_pt": "var * float64",
"bkg_stop_eta": "var * float64",
"bkg_stop_phi": "var * float64",
"bkg_stop_m": "var * float64"}
mktree_dict = {**mktree_dict, **stop_bkg_dict}
file_list.append("bkg_stop")
if type(args.stop_samp) == str:
stop_samp_dict = {"samp_stop_pt": "var * float64",
"samp_stop_eta": "var * float64",
"samp_stop_phi": "var * float64",
"samp_stop_m": "var * float64"}
mktree_dict = {**mktree_dict, **stop_samp_dict}
file_list.append("samp_stop")
create_file.mktree('jet_level', mktree_dict)
uproot_dict = {}
for kind in file_list:
for idx, feat in enumerate(['pt', 'eta', 'phi', 'm']):
n_bins_conc = ak.Array([])
if kind == "bkg_QCD":
h5file = args.QCD_bkg #"missedSignalPartsDatasamp.h5"
array_name = "Jet Data"
elif kind == "bkg_stop":
h5file = args.stop_bkg #"missedSignalPartsDatasamp.h5"
array_name = "Data"
elif kind == "samp_stop":
h5file = args.stop_samp #"signalPartsDatasamp.h5"
array_name = "Data"
with h5py.File(h5file, 'r') as h5f:
feat_array = h5f[array_name][:,idx]
n, bins = np.histogram(feat_array, bins=100)
bin_width = bins[1] - bins[0]
weights = 1/np.sum(n*bin_width)
n = n*weights
n_bins = ak.Array([n, bins])
n_bins_conc = ak.concatenate((n_bins_conc, n_bins),axis=0)
uproot_dict[kind + "_" + feat] = n_bins
create_file["jet_level"].extend(uproot_dict)