From 835259239ff8ae08b6a798ff7d360813ff80be14 Mon Sep 17 00:00:00 2001 From: Luigi Dello Stritto Date: Tue, 28 Nov 2023 16:50:07 +0100 Subject: [PATCH] Fixing multiplicity analysis workflow --- .../analysis/analyzerdhadrons_mult.py | 20 +- machine_learning_hep/bitwise.py | 10 +- .../database_ml_parameters_LcToPKPi.yml | 26 +- .../database_ml_parameters_LcToPKPi_mult.yml | 387 ++++++++++++++++++ machine_learning_hep/processer.py | 1 - machine_learning_hep/processerdhadrons.py | 4 +- .../processerdhadrons_mult.py | 23 +- 7 files changed, 430 insertions(+), 41 deletions(-) create mode 100644 machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_mult.yml diff --git a/machine_learning_hep/analysis/analyzerdhadrons_mult.py b/machine_learning_hep/analysis/analyzerdhadrons_mult.py index 2e5ed366a7..a0dc073baa 100644 --- a/machine_learning_hep/analysis/analyzerdhadrons_mult.py +++ b/machine_learning_hep/analysis/analyzerdhadrons_mult.py @@ -62,10 +62,13 @@ def __init__(self, datap, case, typean, period): [None for _ in range(len(self.lvar2_binmin))]) self.inel0_var = datap["analysis"][self.typean].get("inel0_var", "n_tracklets") - self.d_resultsallpmc = datap["analysis"][typean]["mc"]["results"][period] \ - if period is not None else datap["analysis"][typean]["mc"]["resultsallp"] - self.d_resultsallpdata = datap["analysis"][typean]["data"]["results"][period] \ - if period is not None else datap["analysis"][typean]["data"]["resultsallp"] + self.dp = datap["analysis"][typean] + self.d_prefix_mc = self.dp["mc"].get("prefix_dir_res") + self.d_prefix_data = self.dp["data"].get("prefix_dir_res") + self.d_resultsallpmc = self.d_prefix_mc + self.dp["mc"]["results"][period] \ + if period is not None else self.d_prefix_mc + self.dp["mc"]["resultsallp"] + self.d_resultsallpdata = self.d_prefix_data + dp["data"]["results"][period] \ + if period is not None else self.d_prefix_data + self.dp["data"]["resultsallp"] self.p_corrmb_typean = datap["analysis"][self.typean]["corresp_mb_typean"] if self.p_corrmb_typean is not None: @@ -116,8 +119,7 @@ def __init__(self, datap, case, typean, period): if not isinstance(self.p_includesecpeaks[0], list): self.p_inculdesecpeaks = [self.p_includesecpeaks for _ in range(self.p_nbin2)] - self.p_masssecpeak = datap["analysis"][self.typean]["masssecpeak"] \ - if self.p_includesecpeaks else None + self.p_masssecpeak = datap["analysis"][self.typean].get("masssecpeak", None) self.p_fix_masssecpeaks = datap["analysis"][self.typean].get("fix_masssecpeak", None) if self.p_fix_masssecpeaks is None: @@ -127,10 +129,8 @@ def __init__(self, datap, case, typean, period): if not isinstance(self.p_fix_masssecpeaks[0], list): self.p_fix_masssecpeaks = [self.p_fix_masssecpeaks for _ in range(self.p_nbin2)] - self.p_widthsecpeak = datap["analysis"][self.typean]["widthsecpeak"] \ - if self.p_includesecpeaks else None - self.p_fix_widthsecpeak = datap["analysis"][self.typean]["fix_widthsecpeak"] \ - if self.p_includesecpeaks else None + self.p_widthsecpeak = datap["analysis"][self.typean].get("widthsecpeak", None) + self.p_fix_widthsecpeak = datap["analysis"][self.typean].get("fix_widthsecpeak", None) self.p_fixedmean = datap["analysis"][self.typean]["FixedMean"] self.p_use_user_gauss_sigma = datap["analysis"][self.typean]["SetInitialGaussianSigma"] self.p_max_perc_sigma_diff = datap["analysis"][self.typean]["MaxPercSigmaDeviation"] diff --git a/machine_learning_hep/bitwise.py b/machine_learning_hep/bitwise.py index 1b3001e884..42c6a98d23 100644 --- a/machine_learning_hep/bitwise.py +++ b/machine_learning_hep/bitwise.py @@ -32,18 +32,18 @@ def tag_bit_df(dfin, namebitmap, activatedbit): bitson = activatedbit[0] bitsoff = activatedbit[1] array_cand_type = dfin.loc[:, namebitmap].values.astype("int") - res_on = pd.Series([True]*len(array_cand_type)) - res_off = pd.Series([True]*len(array_cand_type)) - res = pd.Series(dtype = 'int') + res_on = pd.Series([True]*len(array_cand_type), dtype=int) + res_off = pd.Series([True]*len(array_cand_type), dtype=int) + res = pd.Series(dtype=int) if bitson: mask = reduce(operator.or_, ((1 << bit) for bit in bitson), 0) bitmapon = selectbiton(array_cand_type, mask) - res_on = pd.Series(bitmapon) + res_on = pd.Series(bitmapon, dtype=int) if bitsoff: mask = reduce(operator.or_, ((1 << bit) for bit in bitsoff), 0) bitmapoff = selectbitoff(array_cand_type, mask) - res_off = pd.Series(bitmapoff) + res_off = pd.Series(bitmapoff, dtype=object) res = res_on & res_off return res diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi.yml index f032694ade..4b7fab844b 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi.yml @@ -18,7 +18,6 @@ LcpKpi: doml: true mass: 2.286 sel_reco_unp: "fPt>0" - sel_reco_singletrac_unp : null sel_gen_unp: "fPt>0" sel_cen_unp: null sel_good_evt_unp: "fIsEventReject == 0" @@ -60,8 +59,8 @@ LcpKpi: var_jet_match: [df, fIndexHfCand2Prong] var_jetsub_match: [df, fIndexD0ChargedJets] var_evt: - data: [fIndexCollisions, fIsEventReject, fNumContrib] - mc: [fIndexCollisions, fIndexMcCollisions, fIsEventReject, fNumContrib] + data: [fIndexCollisions, fIsEventReject, fNumContrib, fMultZeqNTracksPV, fMultZeqFT0A, fMultZeqFT0C, fMultFT0M, fMultZeqFV0A] + mc: [fIndexCollisions, fIndexMcCollisions, fIsEventReject, fNumContrib, fMultZeqNTracksPV, fMultZeqFT0A, fMultZeqFT0C, fMultFT0M, fMultZeqFV0A] var_gen: [fIndexMcCollisions, fPt, fY, fFlagMc, fOriginMcGen] var_evt_match: [df, fIndexCollisions] var_evt_match_mc: [df, fIndexMcCollisions] @@ -202,7 +201,7 @@ LcpKpi: chunksizeskim: [100] #list of periods fracmerge : [1.0] #list of periods seedmerge: [12] #list of periods - period: [test] #list of periods + period: [LHC22b1b] #list of periods select_period: [1] prefix_dir: /data2/MLhep/ unmerged_tree_dir: [sim/alice/cern.ch/user/a/alihyperloop/jobs/0024] #list of periods @@ -286,7 +285,7 @@ LcpKpi: Run3analysis: proc_type: Dhadrons - useperiod: [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] + useperiod: [1] plotbin: [1] usesinglebineff: 0 sel_binmin2: [1,2,4,6,8,12] #list of nbins @@ -295,12 +294,12 @@ LcpKpi: triggerbit: '' use_cuts: False cuts: - - "fCPA > 0.95" - - "fCPA > 0.95" - - "fCPA > 0.95" - - "fCPA > 0.95" - - "fCPA > 0.95" - - "fCPA > 0.95" + - "fDecayLength > 0.02 and fCpa > 0.9" + - "fDecayLength > 0.02 and fCpa > 0.9" + - "fDecayLength > 0.02 and fCpa > 0.9" + - "fDecayLength > 0.02 and fCpa > 0.9" + - "fDecayLength > 0.02 and fCpa > 0.9" + - "fDecayLength > 0.02 and fCpa > 0.9" # To initialize the individual fits in pT bins @@ -318,7 +317,7 @@ LcpKpi: weighttrig: false data: - runselection: [null, null] #FIXME + runselection: [null] #FIXME prefix_dir_res: /data2/MLhep/ results: [LHC22pp/Results/prod_LHC22o/resultsdata] #list of periods resultsallp: LHC22pp/Results/resultsdatatot @@ -339,10 +338,9 @@ LcpKpi: rebin: [6,6,6,6,6,6] fix_mean: [false,false,false,false,false,false] fix_sigma: [false,false,false,false,false,false] - masssecpeak: 0. # Fix mean and/or sigma FixedMean: False - SetFixGaussianSigma: [true,true,true,true,true,true] + SetFixGaussianSigma: [false,false,false,false,false,false] # Use value set for "masspeak" for initializing total fit, otherwise what is derived from MC fit is used SetInitialGaussianMean: true # Use values set for "sigmaarray" for initializing total fit (per pT bin), diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_mult.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_mult.yml new file mode 100644 index 0000000000..993fad04b9 --- /dev/null +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_LcToPKPi_mult.yml @@ -0,0 +1,387 @@ +############################################################################# +## © Copyright CERN 2018. All rights not expressly granted are reserved. ## +## Author: Gian.Michele.Innocenti@cern.ch ## +## This program is free software: you can redistribute it and/or modify it ## +## under the terms of the GNU General Public License as published by the ## +## Free Software Foundation, either version 3 of the License, or (at your ## +## option) any later version. This program is distributed in the hope that ## +## it will be useful, but WITHOUT ANY WARRANTY; without even the implied ## +## warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ## +## See the GNU General Public License for more details. ## +## You should have received a copy of the GNU General Public License ## +## along with this program. if not, see . ## +############################################################################# + +LcpKpi: + nprongs: 3 + prongformultsub: [0,0,0] + doml: true + mass: 2.286 + sel_reco_unp: "fPt>0" + sel_gen_unp: "fPt>0" + sel_cen_unp: null + sel_good_evt_unp: "fIsEventReject == 0" + sel_reco_skim: [null,null,null,null,null,null] + sel_gen_skim: [null,null,null,null,null,null] + sel_skim_binmin: [1,2,4,6,8,12] #list of nbins + sel_skim_binmax: [2,4,6,8,12,24] #list of nbins + apply_yptacccut: false + var_binning: fPt + dofullevtmerge: false + var_cand: fCandidateSelFlag + var_swap: fIsCandidateSwapped + bitmap_sel: + var_name: fFlagMc + var_name_origgen: fOriginMcGen + var_name_origrec: fOriginMcRec + var_isstd: isstd + var_ismcsignal: ismcsignal + var_ismcprompt: ismcprompt + var_ismcfd: ismcfd + var_ismcbkg: ismcbkg + var_ismcrefl: ismcref + isstd : [[1],[]] + ismcsignal: [[1],[]] + ismcprompt: [[0],[]] + ismcfd: [[1],[]] + ismcbkg: [[],[1]] + ismcrefl: [[1],[1]] + + variables: + var_all: [fIndexCollisions, fPosX, fPosY, fPosZ, fFlagMc, fCandidateSelFlag, fOriginMcRec, fIsCandidateSwapped, fY, fEta, fPt, fCpa, fCpaXY, fM, + fErrorDecayLength, fErrorDecayLengthXY, fChi2PCA, fDecayLength, fDecayLengthXY, fDecayLengthNormalised, fDecayLengthXYNormalised, + fImpactParameterNormalised0, fPtProng0, fImpactParameterNormalised1, fPtProng1, fImpactParameterNormalised2, fPtProng2, + fImpactParameter0, fImpactParameter1, fImpactParameter2, fErrorImpactParameter0, fErrorImpactParameter1, fErrorImpactParameter2, + fNSigTpcPi0, fNSigTpcKa0, fNSigTpcPr0, fNSigTpcPi1, fNSigTpcKa1, fNSigTpcPr1, fNSigTpcPi2, fNSigTpcKa2, fNSigTpcPr2, + fNSigTofPi0, fNSigTofKa0, fNSigTofPr0, fNSigTofPi1, fNSigTofKa1, fNSigTofPr1, fNSigTofPi2, fNSigTofKa2, fNSigTofPr2] + var_jet: [fJetPt, fJetEta, fJetPhi] + var_jetsub: [fZg, fRg, fNsd] + var_jet_match: [df, fIndexHfCand2Prong] + var_jetsub_match: [df, fIndexD0ChargedJets] + var_evt: + data: [fIndexCollisions, fIsEventReject, fNumContrib, fMultZeqNTracksPV, fMultZeqFT0A, fMultZeqFT0C, fMultFT0M, fMultZeqFV0A] + mc: [fIndexCollisions, fIndexMcCollisions, fIsEventReject, fNumContrib, fMultZeqNTracksPV, fMultZeqFT0A, fMultZeqFT0C, fMultFT0M, fMultZeqFV0A] + var_gen: [fIndexMcCollisions, fPt, fY, fFlagMc, fOriginMcGen] + var_evt_match: [df, fIndexCollisions] + var_evt_match_mc: [df, fIndexMcCollisions] + var_training: [[fImpactParameter0, fImpactParameter1, fImpactParameter2, fPtProng0, fPtProng1, fPtProng2], + [fImpactParameter0, fImpactParameter1, fImpactParameter2, fPtProng0, fPtProng1, fPtProng2], + [fImpactParameter0, fImpactParameter1, fImpactParameter2, fPtProng0, fPtProng1, fPtProng2], + [fImpactParameter0, fImpactParameter1, fImpactParameter2, fPtProng0, fPtProng1, fPtProng2], + [fImpactParameter0, fImpactParameter1, fImpactParameter2, fPtProng0, fPtProng1, fPtProng2], + [fImpactParameter0, fImpactParameter1, fImpactParameter2, fPtProng0, fPtProng1, fPtProng2]] + var_boundaries: [fDecayLength, fPt] + var_correlation: + - [fDecayLength, fChi2PCA, fCpa] + - [fPt, fPt, fPt] + var_signal: signal + var_inv_mass: fM + var_y: fY + var_evt_sel: fIsEventReject + var_cuts: + - [fPtProng0, lt, null] + - [fPtProng1, lt, null] + - [fPtProng2, lt, null] + - [fCpa, lt, null] + - [fDecayLength, lt, null] + - [fChi2PCA, lt, null] + + plot_options: + prob_cut_scan: + pt_prong0: + xlim: + - 0 + - 6 + pt_prong1: + xlim: + - 0 + - 6 + pt_prong2: + xlim: + - 0 + - 6 + fDecayLength: + xlim: + - 0 + - 0.05 + fChi2PCA: + xlim: + - 0 + - 0.001 + fNSigTOFPr0: + xlim: [-110, 30] + xlabel: "n\\sigma_\\mathrm{TPC}(p)0" + fNSigTOFPi0: + xlim: [-30, 110] + xlabel: "n\\sigma_\\mathrm{TOF}(\\pi)0" + fNSigTOFKa0: + xlim: [-80, 80] + xlabel: "n\\sigma_\\mathrm{TPC}(K)0" + fNSigTOFPr1: + xlim: [-110, 30] + xlabel: "n\\sigma_\\mathrm{TPC}(p)1" + fNSigTOFPi1: + xlim: [-30, 110] + xlabel: "n\\sigma_\\mathrm{TOF}(\\pi)1" + fNSigTOFKa1: + xlim: [-80, 80] + xlabel: "n\\sigma_\\mathrm{TPC}(K)1" + fNSigTOFPr2: + xlim: [-110, 30] + xlabel: "n\\sigma_\\mathrm{TPC}(p)2" + fNSigTOFPi2: + xlim: [-30, 110] + xlabel: "n\\sigma_\\mathrm{TOF}(\\pi)2" + fNSigTOFKa2: + xlim: [-80, 80] + xlabel: "n\\sigma_\\mathrm{TPC}(K)2" + eff_cut_scan: + pt_prong0: + xlim: + - 0 + - 6 + pt_prong1: + xlim: + - 0 + - 6 + pt_prong2: + xlim: + - 0 + - 6 + fDecayLength: + xlim: + - 0 + - 0.05 + fChi2PCA: + xlim: + - 0 + - 0.0001 + files_names: + namefile_unmerged_tree: AO2D.root + namefile_reco: AnalysisResultsReco.pkl + namefile_evt: AnalysisResultsEvt.pkl + namefile_evtvalroot: AnalysisResultsROOTEvtVal.root + namefile_evtorig: AnalysisResultsEvtOrig.pkl + namefile_gen: AnalysisResultsGen.pkl + namefile_reco_applieddata: AnalysisResultsRecoAppliedData.pkl + namefile_reco_appliedmc: AnalysisResultsRecoAppliedMC.pkl + namefile_mcweights: mcweights.root + treeoriginreco: 'O2hfcandlcfull' + treeorigingen: 'O2hfcandlcfullp' + treeoriginevt: 'O2hfcandlcfullev' + treeoutput: "Lctree" + histofilename: "masshisto.root" + efffilename: "effhisto.root" + respfilename: "resphisto.root" + crossfilename: "cross_section_tot.root" + + multi: + data: + nprocessesparallel: 60 + maxfiles : [-1] #list of periods + chunksizeunp : [100] #list of periods + chunksizeskim: [100] #list of periods + fracmerge : [0.05] #list of periods + seedmerge: [12] #list of periods + period: [LHC22o] #list of periods + select_period: [1] + prefix_dir: /data2/MLhep/ + unmerged_tree_dir: [real/LHC22o_pass4_medium/unmerged] #list of periods + pkl: [LHC22pp/period_LHC22o/pkldata] #list of periods + pkl_skimmed: [LHC22pp/period_LHC22o/pklskdata] #list of periods + pkl_skimmed_merge_for_ml: [LHC22pp/period_LHC22o/pklskmldata] #list of periods + pkl_skimmed_merge_for_ml_all: LHC22pp/mltotdata + pkl_evtcounter_all: LHC22pp/evttotdata + #select_jobs: [[hy_189959], [hy_189000]] + mcreweights: [../Analyses] + mc: + nprocessesparallel: 50 + maxfiles : [-1, -1] #list of periods + chunksizeunp : [100, 100] #list of periods + chunksizeskim: [100, 100] #list of periods + fracmerge : [1.0, 1.0] #list of periods + seedmerge: [12, 12] #list of periods + period: [LHC22b1b, LHC22b1a] #list of periods + select_period: [1, 1] + prefix_dir: /data2/MLhep/ + unmerged_tree_dir: [sim/alice/cern.ch/user/a/alihyperloop/jobs/0024, + sim/LHC22b1a_2P3PDstar/unmerged] #list of periods + pkl: [LHC22pp_mc/prod_LHC22b1b/pklmc, + LHC22pp_mc/prod_LHC22b1a/pklmc] #list of periods + pkl_skimmed: [LHC22pp_mc/prod_LHC22b1b/pklskmc, + LHC22pp_mc/prod_LHC22b1a/pklskmc] #list of periods + pkl_skimmed_merge_for_ml: [LHC22pp_mc/prod_LHC22b1b/pklskmlmc, + LHC22pp_mc/prod_LHC22b1a/pklskmlmc] #list of periods + pkl_skimmed_merge_for_ml_all: LHC22pp_mc/prod_LHC22/mltotmc + pkl_evtcounter_all: LHC22pp_mc/prod_LHC22/evttotmc + mcreweights: [../Analyses, ../Analyses] + ml: + evtsel: null + triggersel: + data: null + mc: null + + nbkg: 500000 + nsig: 500000 + equalise_sig_bkg: True + sampletagforsignal: 1 + sampletagforbkg: 0 + sel_sigml: ismcprompt == 1 + sel_bkgml: fM<2.22 or fM>2.35 + nkfolds: 5 + rnd_shuffle: 12 + rnd_splt: 12 + test_frac: 0.2 + binmin: [1,2,4,6,8,12] # must be equal to sel_skim_binmin (sel_skim_binmin bins) + binmax: [2,4,6,8,12,24] # must be equal to sel_skim_binmax (sel_skim_binmin bins) + mltype: BinaryClassification + ncorescrossval: 10 + prefix_dir_ml: /data2/MLhep/ + mlplot: mlplot # to be removed + mlout: mlout # to be removed + + opt: + isFONLLfromROOT: true + filename_fonll: 'data/fonll/DmesonLcPredictions_502TeV_y05_FFptDepLHCb_BRpythia8.root' # file with FONLL predictions + fonll_particle: 'hLcpkpi' + fonll_pred: 'max' # edge of the FONLL prediction + FF: 0.1281 # fragmentation fraction + sigma_MB: 57.8e-3 # Minimum Bias cross section (pp) 50.87e-3 [b], 1 for Pb-Pb + Taa: 1 # 23260 [b^-1] in 0-10% Pb-Pb, 3917 [b^-1] in 30-50% Pb-Pb, 1 for pp + BR: 6.23e-2 # branching ratio of the decay Lc -> p K- pi+ + f_prompt: 0.9 # estimated fraction of prompt candidates + bkg_data_fraction: 0.1 # fraction of real data used in the estimation + num_steps: 111 # number of steps used in efficiency and signif. estimation + bkg_function: pol2 # fit function for bkg (among TH1 predefined fit functions, e.g. expo, pol1, pol2, ...) + save_fit: True # save bkg fits with the various cuts on ML output + raahp: [1,1,1,1,1,1] # sel_skim_binmin bins + presel_gen_eff: "abs(fY) < 0.8" + #presel_gen_eff: "abs(fY) < 0.8 and abs(fPosZ) < 10" + + mlapplication: + data: + prefix_dir_app: /data2/MLhep/ + pkl_skimmed_dec: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdata] #list of periods + pkl_skimmed_decmerged: [LHC22pp/MLapplication/prod_LHC22o/skpkldecdatamerged] #list of periods + mc: + prefix_dir_app: /data2/MLhep/ + pkl_skimmed_dec: [LHC22pp_mc/MLapplication/prod_LHC22b1b/skpkldecmc, + LHC22pp_mc/MLapplication/prod_LHC22b1a/skpkldecmc,] #list of periods + pkl_skimmed_decmerged: [LHC22pp_mc/MLapplication/prod_LHC22b1b/skpkldecmcmerged, + LHC22pp_mc/MLapplication/prod_LHC22b1a/skpkldecmcmerged] #list of periods + modelname: xgboost + modelsperptbin: [xgboost_classifierLcpKpi_dfselection_fPt_1.0_2.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_2.0_4.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_4.0_6.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_6.0_8.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_8.0_12.0.sav, + xgboost_classifierLcpKpi_dfselection_fPt_12.0_24.0.sav] + probcutpresel: + data: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3] #list of nbins + mc: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3] #list of nbins + probcutoptimal: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] #list of nbins + + analysis: + indexhptspectrum: -1 #kD0Kpi=0, kDplusKpipi=1, kDstarD0pi=2, kDsKKpi=3, kLctopKpi=4, kLcK0Sp=5 + fd_method: -1 #knone=0, kfc=1, kNb=2 + cctype: -1 #kpp7 + sigmav0: -1 + inputfonllpred: null + dir_general_plots: analysis_plots + + Run3analysis: + proc_type: Dhadrons_mult + useperiod: [1,1] + plotbin: [1,1,1,1,1] + usesinglebineff: null + fprompt_from_mb: true + corresp_mb_typean: null + corrEffMult: [false, false, false, false, false] + sel_binmin2: [0,1,10,30,60] #list of nbins + sel_binmax2: [9999,9,29,59,100] #list of nbins + var_binning2: fMultZeqNTracksPV + var_binning2_gen: fMultZeqNTracksPV + nbinshisto: 200 + minvaluehisto: -0.5 + maxvaluehisto: 199.5 + triggerbit: '' + use_cuts: True + cuts: + - "fDecayLength > 0.02 and fCpa > 0.9" + - "fDecayLength > 0.02 and fCpa > 0.9" + - "fDecayLength > 0.02 and fCpa > 0.9" + - "fDecayLength > 0.02 and fCpa > 0.9" + - "fDecayLength > 0.02 and fCpa > 0.9" + - "fDecayLength > 0.02 and fCpa > 0.9" + + + # To initialize the individual fits in pT bins + # Decide whether to take the sigma from MC or data for individual fits + init_fits_from: mc # data # data or mc + + sel_an_binmin: [1,2,4,6,8,12] + sel_an_binmax: [2,4,6,8,12,24] + binning_matching: [0,1,2,3,4,5] + presel_gen_eff: "abs(fY) < 0.8" + evtsel: null + triggersel: + data: null + mc: null + weighttrig: false + + data: + runselection: [null] #FIXME + prefix_dir_res: /data2/MLhep/ + results: [LHC22pp/Results/prod_LHC22o/resultsdata] #list of periods + resultsallp: LHC22pp/Results/resultsdatatot + mc: + runselection: [null, null] #FIXME + prefix_dir_res: /data2/MLhep/ + results: [LHC22pp_mc/Results/prod_LHC22b1b/resultsmc, + LHC22pp_mc/Results/prod_LHC22b1a/resultsmc] #list of periods + resultsallp: LHC22pp_mc/Results/prod_LHC22/resultsmctot + + mass_fit_lim: [2.14, 2.436] # region for the fit of the invariant mass distribution [GeV/c^2] + bin_width: 0.001 # bin width of the invariant mass histogram + init_fits_from: [mc,mc,mc,mc,mc,mc] # data or mc + sgnfunc: [kGaus,kGaus,kGaus,kGaus,kGaus,kGaus] + bkgfunc: [Pol2,Pol2,Pol2,Pol2,Pol2,Pol2] + masspeak: 2.286 + massmin: [2.16, 2.16, 2.16, 2.16, 2.14, 2.14] + massmax: [2.416, 2.416, 2.416, 2.436, 2.436, 2.436] + rebin: [6,6,7,8,8,8] + fix_mean: [false,false,false,false,false,false] + fix_sigma: [false,false,false,false,false,false] + # Fix mean and/or sigma + FixedMean: False + SetFixGaussianSigma: [true,true,true,true,true,true] + # Use value set for "masspeak" for initializing total fit, otherwise what is derived from MC fit is used + SetInitialGaussianMean: true + # Use values set for "sigmaarray" for initializing total fit (per pT bin), + # otherwise what is derived from MC fit is used + SetInitialGaussianSigma: [false,false,false,false,false,false] + # Max percentage deviation in sigma (from init) to be considered as a good fit + MaxPercSigmaDeviation: 0.5 + # Number of initial signal sigmas around the mean to be excluded for side-band fit + exclude_nsigma_sideband: 4 + # Sigma around mean where signal is integrated after total fit has been ne + nsigma_signal: 3 + dolikelihood: true + sigmaarray: [0.01,0.01,0.01,0.01,0.01,0.01] + FixedSigma: false + fitcase: Lc + latexnamehadron: "#Lambda_{c}^{pK#pi}" + latexbin2var: "n_{trkl}" + nevents: null + dodoublecross: false + dobkgfromsideband: false + + systematics: + probvariation: + useperiod: [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] #period from where to define prob cuts + ncutvar: 10 #number of looser and tighter variations + maxperccutvar: 0.25 #max diff in efficiency for loosest/tightest var + cutvarminrange: [0.70, 0.50, 0.50, 0.30, 0.30, 0.30] #Min starting point for scan + cutvarmaxrange: [0.95, 0.90, 0.90, 0.80, 0.80, 0.80] #Max starting point for scan + fixedmean: True #Fix mean cutvar histo to central fit + fixedsigma: True #Fix sigma cutvar histo to central fit \ No newline at end of file diff --git a/machine_learning_hep/processer.py b/machine_learning_hep/processer.py index abfe42062b..2b9f19519d 100755 --- a/machine_learning_hep/processer.py +++ b/machine_learning_hep/processer.py @@ -132,7 +132,6 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab self.s_apply_yptacccut = datap.get("apply_yptacccut", True) #bitmap - self.b_trackcuts = datap["sel_reco_singletrac_unp"] self.b_std = datap["bitmap_sel"]["isstd"] self.b_mcsig = datap["bitmap_sel"]["ismcsignal"] self.b_mcsigprompt = datap["bitmap_sel"]["ismcprompt"] diff --git a/machine_learning_hep/processerdhadrons.py b/machine_learning_hep/processerdhadrons.py index 18cbc9b086..b19b87361f 100755 --- a/machine_learning_hep/processerdhadrons.py +++ b/machine_learning_hep/processerdhadrons.py @@ -60,8 +60,8 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, self.s_trigger = datap["analysis"][self.typean]["triggersel"][self.mcordata] self.triggerbit = datap["analysis"][self.typean]["triggerbit"] self.runlistrigger = runlisttrigger - self.s_var_evt_sel = datap["variables"].get("var_evt_sel", "is_ev_rej") - self.v_invmass = datap["variables"].get("var_inv_mass", "inv_mass") + self.s_var_evt_sel = datap["variables"].get("var_evt_sel", "fIsEventReject") + self.v_invmass = datap["variables"].get("var_inv_mass", "fM") # pylint: disable=too-many-branches def process_histomass_single(self, index): diff --git a/machine_learning_hep/processerdhadrons_mult.py b/machine_learning_hep/processerdhadrons_mult.py index f7a2a704ef..519b11fbb5 100755 --- a/machine_learning_hep/processerdhadrons_mult.py +++ b/machine_learning_hep/processerdhadrons_mult.py @@ -28,6 +28,7 @@ seldf_singlevar_inclusive, openfile from machine_learning_hep.utilities import mergerootfiles from machine_learning_hep.utilities import get_timestamp_string +from machine_learning_hep.utilities import fill_hist #from machine_learning_hep.globalfitter import fitter from machine_learning_hep.processer import Processer from machine_learning_hep.bitwise import filter_bit_df, tag_bit_df @@ -52,6 +53,7 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, p_frac_merge, p_rd_merge, d_pkl_dec, d_pkl_decmerged, d_results, typean, runlisttrigger, d_mcreweights) + self.v_invmass = datap["variables"].get("var_inv_mass", "fM") self.p_mass_fit_lim = datap["analysis"][self.typean]['mass_fit_lim'] self.p_bin_width = datap["analysis"][self.typean]['bin_width'] self.p_num_bins = int(round((self.p_mass_fit_lim[1] - self.p_mass_fit_lim[0]) / \ @@ -137,12 +139,12 @@ def gethistonormforselevt_mult(self, df_evt, dfevtevtsel, label, var, useweightf self.minvaluehisto, self.maxvaluehisto) hVtxOutMult = TH1F('vtxout_' + label, 'vtxout_' + label, self.nbinshisto, self.minvaluehisto, self.maxvaluehisto) - df_to_keep = filter_bit_df(df_evt, 'is_ev_rej', [[], [0, 5, 6, 10, 11]]) + df_to_keep = filter_bit_df(df_evt, 'fIsEventReject', [[], [0, 5, 6, 10, 11]]) # events with reco vtx after previous selection - tag_vtx = tag_bit_df(df_to_keep, 'is_ev_rej', [[], [1, 2, 7, 12]]) + tag_vtx = tag_bit_df(df_to_keep, 'fIsEventReject', [[], [1, 2, 7, 12]]) df_no_vtx = df_to_keep[~tag_vtx.values] # events with reco zvtx > 10 cm after previous selection - df_bit_zvtx_gr10 = filter_bit_df(df_to_keep, 'is_ev_rej', [[3], [1, 2, 7, 12]]) + df_bit_zvtx_gr10 = filter_bit_df(df_to_keep, 'fIsEventReject', [[3], [1, 2, 7, 12]]) if useweightfromfunc is not None: @@ -177,7 +179,10 @@ def process_histomass_single(self, index): dfevtorig = selectdfrunlist(dfevtorig, \ self.run_param[self.runlistrigger], "run_number") neventsafterrunsel = len(dfevtorig) - dfevtevtsel = dfevtorig.query(self.s_evtsel) + if self.s_evtsel is not None: + dfevtevtsel = dfevtorig.query(self.s_evtsel) + else: + dfevtevtsel = dfevtorig #validation plot for event selection neventsafterevtsel = len(dfevtevtsel) @@ -286,13 +291,13 @@ def process_histomass_single(self, index): self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar_inclusive(df, self.v_var2_binning, \ self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) - fill_hist(h_invmass, df_bin.inv_mass) + fill_hist(h_invmass, df_bin[self.v_invmass]) if self.usetriggcorrfunc is not None and self.mcordata == "data": weights = self.make_weights(df_bin[self.v_var2_binning_gen], self.weightfunc, self.weighthist, self.usetriggcorrfunc) weightsinv = [1./weight for weight in weights] - fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv) + fill_hist(h_invmass_weight, df_bin[self.v_invmass], weights=weightsinv) myfile.cd() h_invmass.Write() h_invmass_weight.Write() @@ -306,15 +311,15 @@ def process_histomass_single(self, index): self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) - fill_hist(h_invmass_sig, df_bin_sig.inv_mass) - fill_hist(h_invmass_refl, df_bin_refl.inv_mass) + fill_hist(h_invmass_sig, df_bin_sig[self.v_invmass]) + fill_hist(h_invmass_refl, df_bin_refl[self.v_invmass]) myfile.cd() h_invmass_sig.Write() h_invmass_refl.Write() if self.event_cand_validation is True: df_recodtrig = pd.concat(list_df_recodtrig) - df_recodtrig = df_recodtrig.query("inv_mass>%f and inv_mass<%f" % \ + df_recodtrig = df_recodtrig.query("self.v_invmass>%f and self.v_invmass<%f" % \ (self.mass - 0.15, self.mass + 0.15)) dfevtwithd = pd.merge(dfevtevtsel, df_recodtrig, on=self.v_evtmatch) label = "h%s" % self.v_var2_binning_gen