Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RooFit improvements, processer clean up and small bug fixing #971

Merged
merged 15 commits into from
Feb 7, 2025
39 changes: 33 additions & 6 deletions machine_learning_hep/analysis/analyzer_jets.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
from ROOT import TF1, TCanvas, TFile, gStyle

from machine_learning_hep.analysis.analyzer import Analyzer
from machine_learning_hep.fitting.roofitter import RooFitter
from machine_learning_hep.fitting.roofitter import RooFitter, calc_signif
from machine_learning_hep.fitting.roofitter import create_text_info, add_text_info_fit, add_text_info_perf
from machine_learning_hep.utilities import folding, make_message_notfound
from machine_learning_hep.utils.hist import (bin_array, create_hist, norm_response, fold_hist,
fill_hist_fast, get_axis, get_dim, get_bin_limits,
Expand Down Expand Up @@ -71,6 +72,8 @@ def __init__(self, datap, case, typean, period):
self.n_fileresp = os.path.join(self.d_resultsallpmc_proc, self.n_fileresp)
file_result_name = datap["files_names"]["resultfilename"]
self.n_fileresult = os.path.join(self.d_resultsallpdata, file_result_name)
self.p_pdfnames = datap["analysis"][self.typean]['pdf_names']
self.p_param_names = datap["analysis"][self.typean]['param_names']

self.observables = {
'qa': ['zg', 'rg', 'nsd', 'zpar', 'dr', 'lntheta', 'lnkt', 'lntheta-lnkt'],
Expand Down Expand Up @@ -341,17 +344,41 @@ def _correct_efficiency(self, hist, ipt):


#region fitting
def _roofit_mass(self, hist, ipt, fitcfg, roows = None, filename = None):
def _roofit_mass(self, level, hist, ipt, pdfnames, param_names, fitcfg, roows = None, filename = None):
if fitcfg is None:
return None, None
res, ws, frame = self.fitter.fit_mass_new(hist, fitcfg, roows, True)
res, ws, frame, residual_frame = self.fitter.fit_mass_new(hist, pdfnames, fitcfg, level, roows, True)
frame.SetTitle(f'inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt+1]} GeV/c')
c = TCanvas()

textInfoRight = create_text_info(0.62, 0.68, 1.0, 0.89)
add_text_info_fit(textInfoRight, frame, ws, param_names)

textInfoLeft = create_text_info(0.12, 0.68, 0.6, 0.89)
if level == "data":
mean_sgn = ws.var(self.p_param_names["gauss_mean"])
sigma_sgn = ws.var(self.p_param_names["gauss_sigma"])
(sig, sig_err, bkg, bkg_err,
signif, signif_err, s_over_b, s_over_b_err
) = calc_signif(ws, res, pdfnames, param_names, mean_sgn, sigma_sgn)

add_text_info_perf(textInfoLeft, sig, sig_err, bkg, bkg_err, s_over_b, s_over_b_err, signif, signif_err)

DelloStritto marked this conversation as resolved.
Show resolved Hide resolved
frame.Draw()
textInfoRight.Draw()
textInfoLeft.Draw()
if res.status() != 0:
self.logger.warning('Invalid fit result for %s', hist.GetName())
filename = filename.replace('.png', '_invalid.png')
self._save_canvas(c, filename)

if level == "data":
residual_frame.SetTitle(f'inv. mass for p_{{T}} {self.bins_candpt[ipt]} - {self.bins_candpt[ipt+1]} GeV/c')
cres = TCanvas()
residual_frame.Draw()
filename = filename.replace('.png', '_residual.png')
self._save_canvas(cres, filename)

return res, ws


Expand Down Expand Up @@ -493,7 +520,7 @@ def fit(self):
if var := roows.var(par):
var.setConstant(True)
roo_res, roo_ws = self._roofit_mass(
h_invmass, ipt, fitcfg, roows,
level, h_invmass, ipt, self.p_pdfnames, self.p_param_names, fitcfg, roows,
f'roofit/h_mass_fitted{jetptlabel}_{string_range_pthf(range_pthf)}_{level}.png')
if roo_res.status() != 0:
self.logger.error('RooFit failed for %s iptjet %s ipt %d', level, iptjet, ipt)
Expand All @@ -514,8 +541,8 @@ def fit(self):
self.roo_ws_ptjet[level][jptjet][ipt] = roo_ws.Clone()
# TODO: take parameter names from DB
if level in ('data', 'mc'):
varname_mean = fitcfg.get('var_mean', 'mean')
varname_sigma = fitcfg.get('var_sigma', 'sigma_g1')
varname_mean = fitcfg.get('var_mean', self.p_param_names["gauss_mean"])
varname_sigma = fitcfg.get('var_sigma', self.p_param_names["gauss_sigma"])
self.fit_mean[level][ipt] = roo_ws.var(varname_mean).getValV()
self.fit_sigma[level][ipt] = roo_ws.var(varname_sigma).getValV()
varname_m = fitcfg.get('var', 'm')
Expand Down
Loading
Loading