From 09d25efbb93456140a56ac1c5cf702672f329f44 Mon Sep 17 00:00:00 2001 From: Juraj Smiesko <34742917+kjvbrt@users.noreply.github.com> Date: Thu, 14 Dec 2023 10:51:24 +0100 Subject: [PATCH] Removing legacy style to run an analysis (#327) * Removing legacy style to run an analysis * Fix batch print * Batch mode for root * Remove runValidate --- addons/FastJet/python/jetClusteringHelper.py | 1 + addons/ONNXRuntime/python/jetFlavourHelper.py | 3 +- bin/fccanalysis | 34 +++- .../mH-recoil/mumu/analysis_stage1_batch.py | 2 +- python/FCCAnalysisRun.py | 160 ++++-------------- 5 files changed, 66 insertions(+), 134 deletions(-) diff --git a/addons/FastJet/python/jetClusteringHelper.py b/addons/FastJet/python/jetClusteringHelper.py index 5618974006..ce814427c1 100644 --- a/addons/FastJet/python/jetClusteringHelper.py +++ b/addons/FastJet/python/jetClusteringHelper.py @@ -1,6 +1,7 @@ import json import ROOT +ROOT.gROOT.SetBatch(True) class ExclusiveJetClusteringHelper: def __init__(self, coll, njets, tag=""): diff --git a/addons/ONNXRuntime/python/jetFlavourHelper.py b/addons/ONNXRuntime/python/jetFlavourHelper.py index d40b67e519..a802783235 100644 --- a/addons/ONNXRuntime/python/jetFlavourHelper.py +++ b/addons/ONNXRuntime/python/jetFlavourHelper.py @@ -1,7 +1,8 @@ +import sys import json import ROOT -import sys +ROOT.gROOT.SetBatch(True) class JetFlavourHelper: def __init__(self, coll, jet, jetc, tag=""): diff --git a/bin/fccanalysis b/bin/fccanalysis index 75c8640042..cf466e0c2a 100755 --- a/bin/fccanalysis +++ b/bin/fccanalysis @@ -31,6 +31,8 @@ class MultiLineFormatter(logging.Formatter): def main(): parser = argparse.ArgumentParser(description='FCCAnalyses v0.8.0') + + # Verbosity settings verbosity_argument_group = parser.add_mutually_exclusive_group() verbosity_argument_group.add_argument('-v', '--verbose', action='store_true', @@ -42,16 +44,32 @@ def main(): action='store_true', help='make output most verbose') - subparsers = parser.add_subparsers(help='types of running modes', dest='command') - parser_init = subparsers.add_parser('init', help="generate a RDataFrame based FCC analysis") - parser_build = subparsers.add_parser('build', help='build and install local analysis') + # Create sub-parsers + subparsers = parser.add_subparsers(help='types of running modes', + dest='command') + parser_init = subparsers.add_parser( + 'init', + help="generate a RDataFrame based FCC analysis") + parser_build = subparsers.add_parser( + 'build', + help='build and install local analysis') parser_test = subparsers.add_parser( - 'test', help='test whole or a part of the analysis framework') - parser_pin = subparsers.add_parser('pin', help='pin fccanalyses to the current version of Key4hep stack') - parser_run = subparsers.add_parser('run', help="run a RDataFrame based FCC analysis") - parser_run_final = subparsers.add_parser('final', help="run a RDataFrame based FCC analysis final configuration") - parser_run_plots = subparsers.add_parser('plots', help="run a RDataFrame based FCC analysis plot configuration") + 'test', + help='test whole or a part of the analysis framework') + parser_pin = subparsers.add_parser( + 'pin', + help='pin fccanalyses to the current version of Key4hep stack') + parser_run = subparsers.add_parser( + 'run', + help="run a RDataFrame based FCC analysis") + parser_run_final = subparsers.add_parser( + 'final', + help="run a RDataFrame based FCC analysis final configuration") + parser_run_plots = subparsers.add_parser( + 'plots', + help="run a RDataFrame based FCC analysis plot configuration") + # Setup sub-parsers import Parsers as fccpars fccpars.setup_init_parser(parser_init) fccpars.setup_build_parser(parser_build) diff --git a/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1_batch.py b/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1_batch.py index 6fcc5ab5a4..ca02b653fe 100644 --- a/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1_batch.py +++ b/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1_batch.py @@ -8,7 +8,7 @@ #Mandatory: Production tag when running over EDM4Hep centrally produced events, this points to the yaml files for getting sample statistics prodTag = "FCCee/spring2021/IDEA/" -#Optional: output directory, default is local dir +#Optional: output directory, default is local running dir outputDir = "ZH_mumu_recoil_batch/stage1" #Optional: ncpus, default is 4 diff --git a/python/FCCAnalysisRun.py b/python/FCCAnalysisRun.py index 29017ad79a..9d42631079 100644 --- a/python/FCCAnalysisRun.py +++ b/python/FCCAnalysisRun.py @@ -1,5 +1,5 @@ -import ROOT -import os, sys +import os +import sys import time import yaml import glob @@ -7,16 +7,16 @@ import logging import subprocess import importlib.util -from array import array import datetime import numpy as np +import ROOT from anafile import getElement, getElementDict from process import getProcessInfo, get_process_dict - LOGGER = logging.getLogger('FCCAnalyses.run') +ROOT.gROOT.SetBatch(True) # __________________________________________________________ def get_entries(infilepath): @@ -308,7 +308,7 @@ def sendToBatch(rdfModule, chunkList, process, analysisFile): frun_condor.write('Error = {}/condor_job.{}.$(ClusterId).$(ProcId).error\n'.format(logDir,process)) frun_condor.write('getenv = False\n') frun_condor.write('environment = "LS_SUBCWD={}"\n'.format(logDir)) # not sure - frun_condor.write('requirements = ( (OpSysAndVer =?= "CentOS7") && (Machine =!= LastRemoteHost) && (TARGET.has_avx2 =?= True) )\n') + frun_condor.write('requirements = ( (Machine =!= LastRemoteHost) && (TARGET.has_avx2 =?= True) )\n') frun_condor.write('on_exit_remove = (ExitBySignal == False) && (ExitCode == 0)\n') frun_condor.write('max_retries = 3\n') frun_condor.write('+JobFlavour = "{}"\n'.format(getElement(rdfModule, "batchQueue"))) @@ -318,8 +318,8 @@ def sendToBatch(rdfModule, chunkList, process, analysisFile): frun_condor.close() cmdBatch="condor_submit {}".format(frunfull_condor) - LOGGER.info('Batch command: ', cmdBatch) - job=SubmitToCondor(cmdBatch,10) + LOGGER.info('Batch command: %s', cmdBatch) + job=SubmitToCondor(cmdBatch, 10) #__________________________________________________________ @@ -1040,71 +1040,33 @@ def runHistmaker(args, rdfModule, analysisFile): LOGGER.info(info_msg) -#__________________________________________________________ +# __________________________________________________________ def runPlots(analysisFile): import doPlots as dp dp.run(analysisFile) -#__________________________________________________________ -def runValidate(jobdir): - listdir=os.listdir(jobdir) - if jobdir[-1]!="/":jobdir+="/" - for dir in listdir: - if not os.path.isdir(jobdir+dir): continue - listfile=glob.glob(jobdir+dir+"/*.sh") - for file in listfile: - with open(file) as f: - for line in f: - pass - lastLine = line - LOGGER.info(line) - -#__________________________________________________________ -def setup_run_parser(parser): - publicOptions = parser.add_argument_group('User options') - publicOptions.add_argument('anafile_path', help="path to analysis script") - publicOptions.add_argument("--files-list", help="Specify input file to bypass the processList", default=[], nargs='+') - publicOptions.add_argument("--output", help="Specify output file name to bypass the processList and or outputList, default output.root", type=str, default="output.root") - publicOptions.add_argument("--nevents", help="Specify max number of events to process", type=int, default=-1) - publicOptions.add_argument("--test", action='store_true', help="Run over the test file", default=False) - publicOptions.add_argument('--bench', action='store_true', help='Output benchmark results to a JSON file', default=False) - publicOptions.add_argument("--ncpus", help="Set number of threads", type=int) - publicOptions.add_argument("--final", action='store_true', help="Run final analysis (produces final histograms and trees)", default=False) - publicOptions.add_argument("--plots", action='store_true', help="Run analysis plots", default=False) - publicOptions.add_argument("--preprocess", action='store_true', help="Run preprocessing", default=False) - publicOptions.add_argument("--validate", action='store_true', help="Validate a given production", default=False) - publicOptions.add_argument("--rerunfailed", action='store_true', help="Rerun failed jobs", default=False) - publicOptions.add_argument("--jobdir", help="Specify the batch job directory", type=str, default="output.root") - - internalOptions = parser.add_argument_group('\033[4m\033[1m\033[91m Internal options, NOT FOR USERS\033[0m') - internalOptions.add_argument("--batch", action='store_true', help="Submit on batch", default=False) +def run(parser): + ''' + Set things in motion. + ''' + args, _ = parser.parse_known_args() -#__________________________________________________________ -def run(mainparser, subparser=None): - """ - Set things in motion. - The two parser arguments are a hack to allow running this - both as `fccanalysis run` and `python config/FCCAnalysisRun.py` - For the latter case, both are the same (see below). - """ - - if subparser: - setup_run_parser(subparser) - args, _ = mainparser.parse_known_args() - #check that the analysis file exists + # Check that the analysis file exists analysisFile = args.anafile_path if not os.path.isfile(analysisFile): - LOGGER.error('Script %s does not exist!\nSpecify a valid analysis ' - 'script in the command line arguments', analysisFile) + LOGGER.error('Analysis script %s not found!\nAborting...', + analysisFile) sys.exit(3) + # Load pre compiled analyzers LOGGER.info('Loading analyzers from libFCCAnalyses...') ROOT.gSystem.Load("libFCCAnalyses") # Is this still needed?? 01/04/2022 still to be the case _fcc = ROOT.dummyLoader + # Set verbosity level if args.verbose: # ROOT.Experimental.ELogLevel.kInfo verbosity level is more # equivalent to DEBUG in other log systems @@ -1131,75 +1093,25 @@ def run(mainparser, subparser=None): rdfModule = importlib.util.module_from_spec(rdfSpec) rdfSpec.loader.exec_module(rdfModule) - if hasattr(args, 'command'): - if args.command == "run": - if hasattr(rdfModule, "build_graph") and hasattr(rdfModule, "RDFanalysis"): - LOGGER.error('Analysis file ambiguous!\nBoth "RDFanalysis" ' - 'class and "build_graph" function are defined.') - sys.exit(3) - elif hasattr(rdfModule, "build_graph") and not hasattr(rdfModule, "RDFanalysis"): - runHistmaker(args, rdfModule, analysisFile) - elif not hasattr(rdfModule, "build_graph") and hasattr(rdfModule, "RDFanalysis"): - runStages(args, rdfModule, args.preprocess, analysisFile) - else: - LOGGER.error('Analysis file does not contain required ' - 'objects!\nProvide either "RDFanalysis" class or ' - '"build_graph" function.') - sys.exit(3) - elif args.command == "final": - runFinal(rdfModule) - elif args.command == "plots": - runPlots(analysisFile) - return - - LOGGER.warning('Running the old way...\nThis way of running the analysis ' - 'is deprecated and will be removed in the next release!\n' - 'The FCCAnalyses release 0.8.0 is the last one to support ' - 'this stile of running!') - - # below is legacy using the old way of runnig with options in - # "python config/FCCAnalysisRun.py analysis.py --options check if this is - # final analysis - if args.final: - if args.plots: - LOGGER.error('Can not have --plots with --final, exit') - sys.exit(3) - if args.preprocess: - LOGGER.error('Can not have --preprocess with --final, exit') - sys.exit(3) - runFinal(rdfModule) + if not hasattr(args, 'command'): + LOGGER.error('Unknow sub-command "%s"!\nAborting...') + sys.exit(3) - elif args.plots: - if args.final: - LOGGER.error('Can not have --final with --plots, exit') + if args.command == "run": + if hasattr(rdfModule, "build_graph") and hasattr(rdfModule, "RDFanalysis"): + LOGGER.error('Analysis file ambiguous!\nBoth "RDFanalysis" ' + 'class and "build_graph" function are defined.') sys.exit(3) - if args.preprocess: - LOGGER.error('Can not have --preprocess with --plots, exit') + elif hasattr(rdfModule, "build_graph") and not hasattr(rdfModule, "RDFanalysis"): + runHistmaker(args, rdfModule, analysisFile) + elif not hasattr(rdfModule, "build_graph") and hasattr(rdfModule, "RDFanalysis"): + runStages(args, rdfModule, args.preprocess, analysisFile) + else: + LOGGER.error('Analysis file does not contain required ' + 'objects!\nProvide either "RDFanalysis" class or ' + '"build_graph" function.') sys.exit(3) + elif args.command == "final": + runFinal(rdfModule) + elif args.command == "plots": runPlots(analysisFile) - - elif args.validate: - runValidate(args.jobdir) - - else: - if args.preprocess: - if args.plots: - LOGGER.error('Can not have --plots with --preprocess, exit') - sys.exit(3) - if args.final: - LOGGER.error('Can not have --final with --preprocess, exit') - sys.exit(3) - runStages(args, rdfModule, args.preprocess, analysisFile) - - -# __________________________________________________________ -if __name__ == "__main__": - LOGGER.warning('Running this script directly is deprecated, use ' - '`fccanalysis run` instead. FCCAnalyses release 0.8.0 is ' - 'the last one to support this style of running!') - # legacy behavior: allow running this script directly - # with python config/FCCAnalysis.py - # and the same behavior as `fccanalysis run` - import argparse - parser = argparse.ArgumentParser() - run(parser, parser)