From 71283307a64585cdf0ea3d70df9f6343d452cd0f Mon Sep 17 00:00:00 2001 From: Jochen Klein Date: Tue, 14 Nov 2023 17:07:12 +0100 Subject: [PATCH] Fix for D0 jets --- .../database_ml_parameters_D0pp_jet.yml | 30 +++++++++++-------- machine_learning_hep/multiprocesser.py | 2 +- machine_learning_hep/processer.py | 3 +- machine_learning_hep/steer_analysis.py | 8 +++-- machine_learning_hep/utilities.py | 2 +- 5 files changed, 25 insertions(+), 20 deletions(-) diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml index a53013b168..f8372a4af8 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_D0pp_jet.yml @@ -143,27 +143,31 @@ D0pp_jet: fracmerge: [0.08] #list of periods seedmerge: [12] #list of periods period: [LHC22o_pass4] #list of periods - unmerged_tree_dir: [/home/nzardosh/QARun3] #list of periods - pkl: [/home/jklein/data/test/d0jet/pkl] #list of periods - pkl_skimmed: [/home/jklein/data/test/d0jet/pklsk] #list of periods - pkl_skimmed_merge_for_ml: [/home/jklein/data/test/d0jet/pklskml] #list of periods - pkl_skimmed_merge_for_ml_all: /home/jklein/data/test/d0jet/pp_data_mltot - pkl_evtcounter_all: /home/jklein/data/test/d0jet/pp_data_evttot + select_period: [0] + prefix_dir: /home/jklein/ + unmerged_tree_dir: [QARun3] #list of periods + pkl: [data/test/d0jet/pkl] #list of periods + pkl_skimmed: [data/test/d0jet/pklsk] #list of periods + pkl_skimmed_merge_for_ml: [data/test/d0jet/pklskml] #list of periods + pkl_skimmed_merge_for_ml_all: data/test/d0jet/pp_data_mltot + pkl_evtcounter_all: data/test/d0jet/pp_data_evttot mcreweights: [../Analyses] #list of periods mc: nprocessesparallel: 40 - maxfiles: [1] #list of periods + maxfiles: [5] #list of periods chunksizeunp: [100] #list of periods chunksizeskim: [1000] #list of periods fracmerge: [1.0] #list of periods seedmerge: [12] #list of periods period: [mctest] #list of periods - unmerged_tree_dir: [/home/jklein/data/alice/cern.ch/user/a/alihyperloop/jobs/0024/hy_240092] #list of periods - pkl: [/home/jklein/data/mctest/d0jet/pkl] #list of periods - pkl_skimmed: [/home/jklein/data/mctest/d0jet/pklsk] #list of periods - pkl_skimmed_merge_for_ml: [/home/jklein/data/mctest/d0jet/pklskml] #list of periods - pkl_skimmed_merge_for_ml_all: /home/jklein/data/mctest/d0jet/pp_mc_prod_mltot - pkl_evtcounter_all: /home/jklein/data/mctest/d0jet/pp_mc_prod_evttot + select_period: [1] + prefix_dir: /home/jklein/ + unmerged_tree_dir: [data/alice/cern.ch/user/a/alihyperloop/jobs/0024/hy_240092] #list of periods + pkl: [data/mctest/d0jet/pkl] #list of periods + pkl_skimmed: [data/mctest/d0jet/pklsk] #list of periods + pkl_skimmed_merge_for_ml: [data/mctest/d0jet/pklskml] #list of periods + pkl_skimmed_merge_for_ml_all: data/mctest/d0jet/pp_mc_prod_mltot + pkl_evtcounter_all: data/mctest/d0jet/pp_mc_prod_evttot mcreweights: [../Analyses] #list of periods ml: diff --git a/machine_learning_hep/multiprocesser.py b/machine_learning_hep/multiprocesser.py index c1d9d55961..9aa92d43f8 100755 --- a/machine_learning_hep/multiprocesser.py +++ b/machine_learning_hep/multiprocesser.py @@ -33,7 +33,7 @@ def __init__(self, case, proc_class, datap, typean, run_param, mcordata): self.mcordata = mcordata self.prodnumber = len(datap["multi"][self.mcordata]["unmerged_tree_dir"]) self.p_period = datap["multi"][self.mcordata]["period"] - self.select_period = datap["multi"][mcordata]["select_period"] + self.select_period = datap["multi"][self.mcordata]["select_period"] self.p_seedmerge = datap["multi"][self.mcordata]["seedmerge"] self.p_fracmerge = datap["multi"][self.mcordata]["fracmerge"] self.p_maxfiles = datap["multi"][self.mcordata]["maxfiles"] diff --git a/machine_learning_hep/processer.py b/machine_learning_hep/processer.py index 2c9855bfea..80c48d68c8 100755 --- a/machine_learning_hep/processer.py +++ b/machine_learning_hep/processer.py @@ -22,7 +22,6 @@ import os import random as rd import re -from tqdm import tqdm import uproot import pandas as pd import numpy as np @@ -317,7 +316,7 @@ def read_tree(tree, df_base, var): df_processed = set() keys = rfile.keys() - for (idx, key) in tqdm(enumerate(keys[:max_no_keys]), total=len(keys)): + for (idx, key) in enumerate(keys[:max_no_keys]): if not (df_key := re.match('^DF_(\\d+);', key)): continue diff --git a/machine_learning_hep/steer_analysis.py b/machine_learning_hep/steer_analysis.py index 7e25043a1d..45cb003c70 100644 --- a/machine_learning_hep/steer_analysis.py +++ b/machine_learning_hep/steer_analysis.py @@ -337,9 +337,11 @@ def do_entire_analysis(data_config: dict, data_param: dict, data_param_overwrite proc_class = ProcesserJets ana_class = AnalyzerJets - mymultiprocessmc = MultiProcesser(case, proc_class, data_param[case], typean, run_param, "mc") - mymultiprocessdata = MultiProcesser(case, proc_class, data_param[case], typean, run_param,\ - "data") + mymultiprocessmc = MultiProcesser( + case, proc_class, data_param[case], typean, run_param, "mc") + mymultiprocessdata = MultiProcesser( + case, proc_class, data_param[case], typean, run_param, "data") + ana_mgr = AnalyzerManager(ana_class, data_param[case], case, typean, doanaperperiod) analyzers = ana_mgr.get_analyzers() diff --git a/machine_learning_hep/utilities.py b/machine_learning_hep/utilities.py index 989a93d353..a305f777d4 100644 --- a/machine_learning_hep/utilities.py +++ b/machine_learning_hep/utilities.py @@ -153,7 +153,7 @@ def list_folders(main_dir, filenameinput, maxfiles, select=None): # pylint: disa List all files in a subdirectory structure """ if not os.path.isdir(main_dir): - print("the input directory =", main_dir, "does not exist") + logger.error("the input directory = <%s> does not exist", main_dir) list_subdir0 = os.listdir(main_dir) listfolders = [] for subdir0 in list_subdir0: # pylint: disable=too-many-nested-blocks