From 09d25efbb93456140a56ac1c5cf702672f329f44 Mon Sep 17 00:00:00 2001
From: Juraj Smiesko <34742917+kjvbrt@users.noreply.github.com>
Date: Thu, 14 Dec 2023 10:51:24 +0100
Subject: [PATCH] Removing legacy style to run an analysis (#327)

* Removing legacy style to run an analysis

* Fix batch print

* Batch mode for root

* Remove runValidate
---
 addons/FastJet/python/jetClusteringHelper.py  |   1 +
 addons/ONNXRuntime/python/jetFlavourHelper.py |   3 +-
 bin/fccanalysis                               |  34 +++-
 .../mH-recoil/mumu/analysis_stage1_batch.py   |   2 +-
 python/FCCAnalysisRun.py                      | 160 ++++--------------
 5 files changed, 66 insertions(+), 134 deletions(-)

diff --git a/addons/FastJet/python/jetClusteringHelper.py b/addons/FastJet/python/jetClusteringHelper.py
index 5618974006..ce814427c1 100644
--- a/addons/FastJet/python/jetClusteringHelper.py
+++ b/addons/FastJet/python/jetClusteringHelper.py
@@ -1,6 +1,7 @@
 import json
 import ROOT
 
+ROOT.gROOT.SetBatch(True)
 
 class ExclusiveJetClusteringHelper:
     def __init__(self, coll, njets, tag=""):
diff --git a/addons/ONNXRuntime/python/jetFlavourHelper.py b/addons/ONNXRuntime/python/jetFlavourHelper.py
index d40b67e519..a802783235 100644
--- a/addons/ONNXRuntime/python/jetFlavourHelper.py
+++ b/addons/ONNXRuntime/python/jetFlavourHelper.py
@@ -1,7 +1,8 @@
+import sys
 import json
 import ROOT
-import sys
 
+ROOT.gROOT.SetBatch(True)
 
 class JetFlavourHelper:
     def __init__(self, coll, jet, jetc, tag=""):
diff --git a/bin/fccanalysis b/bin/fccanalysis
index 75c8640042..cf466e0c2a 100755
--- a/bin/fccanalysis
+++ b/bin/fccanalysis
@@ -31,6 +31,8 @@ class MultiLineFormatter(logging.Formatter):
 
 def main():
     parser = argparse.ArgumentParser(description='FCCAnalyses v0.8.0')
+
+    # Verbosity settings
     verbosity_argument_group = parser.add_mutually_exclusive_group()
     verbosity_argument_group.add_argument('-v', '--verbose',
                                           action='store_true',
@@ -42,16 +44,32 @@ def main():
                                           action='store_true',
                                           help='make output most verbose')
 
-    subparsers = parser.add_subparsers(help='types of running modes', dest='command')
-    parser_init      = subparsers.add_parser('init', help="generate a RDataFrame based FCC analysis")
-    parser_build     = subparsers.add_parser('build', help='build and install local analysis')
+    # Create sub-parsers
+    subparsers = parser.add_subparsers(help='types of running modes',
+                                       dest='command')
+    parser_init = subparsers.add_parser(
+        'init',
+        help="generate a RDataFrame based FCC analysis")
+    parser_build = subparsers.add_parser(
+        'build',
+        help='build and install local analysis')
     parser_test = subparsers.add_parser(
-        'test', help='test whole or a part of the analysis framework')
-    parser_pin       = subparsers.add_parser('pin', help='pin fccanalyses to the current version of Key4hep stack')
-    parser_run       = subparsers.add_parser('run', help="run a RDataFrame based FCC analysis")
-    parser_run_final = subparsers.add_parser('final', help="run a RDataFrame based FCC analysis final configuration")
-    parser_run_plots = subparsers.add_parser('plots', help="run a RDataFrame based FCC analysis plot configuration")
+        'test',
+        help='test whole or a part of the analysis framework')
+    parser_pin = subparsers.add_parser(
+        'pin',
+        help='pin fccanalyses to the current version of Key4hep stack')
+    parser_run = subparsers.add_parser(
+        'run',
+        help="run a RDataFrame based FCC analysis")
+    parser_run_final = subparsers.add_parser(
+        'final',
+        help="run a RDataFrame based FCC analysis final configuration")
+    parser_run_plots = subparsers.add_parser(
+        'plots',
+        help="run a RDataFrame based FCC analysis plot configuration")
 
+    # Setup sub-parsers
     import Parsers as fccpars
     fccpars.setup_init_parser(parser_init)
     fccpars.setup_build_parser(parser_build)
diff --git a/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1_batch.py b/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1_batch.py
index 6fcc5ab5a4..ca02b653fe 100644
--- a/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1_batch.py
+++ b/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1_batch.py
@@ -8,7 +8,7 @@
 #Mandatory: Production tag when running over EDM4Hep centrally produced events, this points to the yaml files for getting sample statistics
 prodTag     = "FCCee/spring2021/IDEA/"
 
-#Optional: output directory, default is local dir
+#Optional: output directory, default is local running dir
 outputDir   = "ZH_mumu_recoil_batch/stage1"
 
 #Optional: ncpus, default is 4
diff --git a/python/FCCAnalysisRun.py b/python/FCCAnalysisRun.py
index 29017ad79a..9d42631079 100644
--- a/python/FCCAnalysisRun.py
+++ b/python/FCCAnalysisRun.py
@@ -1,5 +1,5 @@
-import ROOT
-import os, sys
+import os
+import sys
 import time
 import yaml
 import glob
@@ -7,16 +7,16 @@
 import logging
 import subprocess
 import importlib.util
-from array import array
 import datetime
 import numpy as np
 
+import ROOT
 from anafile import getElement, getElementDict
 from process import getProcessInfo, get_process_dict
 
-
 LOGGER = logging.getLogger('FCCAnalyses.run')
 
+ROOT.gROOT.SetBatch(True)
 
 # __________________________________________________________
 def get_entries(infilepath):
@@ -308,7 +308,7 @@ def sendToBatch(rdfModule, chunkList, process, analysisFile):
     frun_condor.write('Error            = {}/condor_job.{}.$(ClusterId).$(ProcId).error\n'.format(logDir,process))
     frun_condor.write('getenv           = False\n')
     frun_condor.write('environment      = "LS_SUBCWD={}"\n'.format(logDir)) # not sure
-    frun_condor.write('requirements     = ( (OpSysAndVer =?= "CentOS7") && (Machine =!= LastRemoteHost) && (TARGET.has_avx2 =?= True) )\n')
+    frun_condor.write('requirements     = ( (Machine =!= LastRemoteHost) && (TARGET.has_avx2 =?= True) )\n')
     frun_condor.write('on_exit_remove   = (ExitBySignal == False) && (ExitCode == 0)\n')
     frun_condor.write('max_retries      = 3\n')
     frun_condor.write('+JobFlavour      = "{}"\n'.format(getElement(rdfModule, "batchQueue")))
@@ -318,8 +318,8 @@ def sendToBatch(rdfModule, chunkList, process, analysisFile):
     frun_condor.close()
 
     cmdBatch="condor_submit {}".format(frunfull_condor)
-    LOGGER.info('Batch command: ', cmdBatch)
-    job=SubmitToCondor(cmdBatch,10)
+    LOGGER.info('Batch command: %s', cmdBatch)
+    job=SubmitToCondor(cmdBatch, 10)
 
 
 #__________________________________________________________
@@ -1040,71 +1040,33 @@ def runHistmaker(args, rdfModule, analysisFile):
     LOGGER.info(info_msg)
 
 
-#__________________________________________________________
+# __________________________________________________________
 def runPlots(analysisFile):
     import doPlots as dp
     dp.run(analysisFile)
 
-#__________________________________________________________
-def runValidate(jobdir):
-    listdir=os.listdir(jobdir)
-    if jobdir[-1]!="/":jobdir+="/"
-    for dir in listdir:
-        if not os.path.isdir(jobdir+dir): continue
-        listfile=glob.glob(jobdir+dir+"/*.sh")
-        for file in listfile:
-            with open(file) as f:
-                for line in f:
-                    pass
-                lastLine = line
-            LOGGER.info(line)
-
 
-#__________________________________________________________
-def setup_run_parser(parser):
-    publicOptions = parser.add_argument_group('User options')
-    publicOptions.add_argument('anafile_path', help="path to analysis script")
-    publicOptions.add_argument("--files-list", help="Specify input file to bypass the processList", default=[], nargs='+')
-    publicOptions.add_argument("--output", help="Specify output file name to bypass the processList and or outputList, default output.root", type=str, default="output.root")
-    publicOptions.add_argument("--nevents", help="Specify max number of events to process", type=int, default=-1)
-    publicOptions.add_argument("--test", action='store_true', help="Run over the test file", default=False)
-    publicOptions.add_argument('--bench', action='store_true', help='Output benchmark results to a JSON file', default=False)
-    publicOptions.add_argument("--ncpus", help="Set number of threads", type=int)
-    publicOptions.add_argument("--final", action='store_true', help="Run final analysis (produces final histograms and trees)", default=False)
-    publicOptions.add_argument("--plots", action='store_true', help="Run analysis plots", default=False)
-    publicOptions.add_argument("--preprocess", action='store_true', help="Run preprocessing", default=False)
-    publicOptions.add_argument("--validate", action='store_true', help="Validate a given production", default=False)
-    publicOptions.add_argument("--rerunfailed", action='store_true', help="Rerun failed jobs", default=False)
-    publicOptions.add_argument("--jobdir", help="Specify the batch job directory", type=str, default="output.root")
-
-    internalOptions = parser.add_argument_group('\033[4m\033[1m\033[91m Internal options, NOT FOR USERS\033[0m')
-    internalOptions.add_argument("--batch", action='store_true', help="Submit on batch", default=False)
+def run(parser):
+    '''
+    Set things in motion.
+    '''
 
+    args, _ = parser.parse_known_args()
 
-#__________________________________________________________
-def run(mainparser, subparser=None):
-    """
-    Set things in motion.
-    The two parser arguments are a hack to allow running this
-    both as `fccanalysis run` and `python config/FCCAnalysisRun.py`
-    For the latter case, both are the same (see below).
-    """
-
-    if subparser:
-        setup_run_parser(subparser)
-    args, _ = mainparser.parse_known_args()
-    #check that the analysis file exists
+    # Check that the analysis file exists
     analysisFile = args.anafile_path
     if not os.path.isfile(analysisFile):
-        LOGGER.error('Script %s does not exist!\nSpecify a valid analysis '
-                     'script in the command line arguments', analysisFile)
+        LOGGER.error('Analysis script %s not found!\nAborting...',
+                     analysisFile)
         sys.exit(3)
 
+    # Load pre compiled analyzers
     LOGGER.info('Loading analyzers from libFCCAnalyses...')
     ROOT.gSystem.Load("libFCCAnalyses")
     # Is this still needed?? 01/04/2022 still to be the case
     _fcc = ROOT.dummyLoader
 
+    # Set verbosity level
     if args.verbose:
         # ROOT.Experimental.ELogLevel.kInfo verbosity level is more
         # equivalent to DEBUG in other log systems
@@ -1131,75 +1093,25 @@ def run(mainparser, subparser=None):
     rdfModule = importlib.util.module_from_spec(rdfSpec)
     rdfSpec.loader.exec_module(rdfModule)
 
-    if hasattr(args, 'command'):
-        if args.command == "run":
-            if hasattr(rdfModule, "build_graph") and hasattr(rdfModule, "RDFanalysis"):
-                LOGGER.error('Analysis file ambiguous!\nBoth "RDFanalysis" '
-                             'class and "build_graph" function are defined.')
-                sys.exit(3)
-            elif hasattr(rdfModule, "build_graph") and not hasattr(rdfModule, "RDFanalysis"):
-                runHistmaker(args, rdfModule, analysisFile)
-            elif not hasattr(rdfModule, "build_graph") and hasattr(rdfModule, "RDFanalysis"):
-                runStages(args, rdfModule, args.preprocess, analysisFile)
-            else:
-                LOGGER.error('Analysis file does not contain required '
-                             'objects!\nProvide either "RDFanalysis" class or '
-                             '"build_graph" function.')
-                sys.exit(3)
-        elif args.command == "final":
-            runFinal(rdfModule)
-        elif args.command == "plots":
-            runPlots(analysisFile)
-        return
-
-    LOGGER.warning('Running the old way...\nThis way of running the analysis '
-                   'is deprecated and will be removed in the next release!\n'
-                   'The FCCAnalyses release 0.8.0 is the last one to support '
-                   'this stile of running!')
-
-    # below is legacy using the old way of runnig with options in
-    # "python config/FCCAnalysisRun.py analysis.py --options check if this is
-    # final analysis
-    if args.final:
-        if args.plots:
-            LOGGER.error('Can not have --plots with --final, exit')
-            sys.exit(3)
-        if args.preprocess:
-            LOGGER.error('Can not have --preprocess with --final, exit')
-            sys.exit(3)
-        runFinal(rdfModule)
+    if not hasattr(args, 'command'):
+        LOGGER.error('Unknow sub-command "%s"!\nAborting...')
+        sys.exit(3)
 
-    elif args.plots:
-        if args.final:
-            LOGGER.error('Can not have --final with --plots, exit')
+    if args.command == "run":
+        if hasattr(rdfModule, "build_graph") and hasattr(rdfModule, "RDFanalysis"):
+            LOGGER.error('Analysis file ambiguous!\nBoth "RDFanalysis" '
+                         'class and "build_graph" function are defined.')
             sys.exit(3)
-        if args.preprocess:
-            LOGGER.error('Can not have --preprocess with --plots, exit')
+        elif hasattr(rdfModule, "build_graph") and not hasattr(rdfModule, "RDFanalysis"):
+            runHistmaker(args, rdfModule, analysisFile)
+        elif not hasattr(rdfModule, "build_graph") and hasattr(rdfModule, "RDFanalysis"):
+            runStages(args, rdfModule, args.preprocess, analysisFile)
+        else:
+            LOGGER.error('Analysis file does not contain required '
+                         'objects!\nProvide either "RDFanalysis" class or '
+                         '"build_graph" function.')
             sys.exit(3)
+    elif args.command == "final":
+        runFinal(rdfModule)
+    elif args.command == "plots":
         runPlots(analysisFile)
-
-    elif args.validate:
-        runValidate(args.jobdir)
-
-    else:
-        if args.preprocess:
-            if args.plots:
-                LOGGER.error('Can not have --plots with --preprocess, exit')
-                sys.exit(3)
-            if args.final:
-                LOGGER.error('Can not have --final with --preprocess, exit')
-                sys.exit(3)
-        runStages(args, rdfModule, args.preprocess, analysisFile)
-
-
-# __________________________________________________________
-if __name__ == "__main__":
-    LOGGER.warning('Running this script directly is deprecated, use '
-                   '`fccanalysis run` instead. FCCAnalyses release 0.8.0 is '
-                   'the last one to support this style of running!')
-    # legacy behavior: allow running this script directly
-    # with python config/FCCAnalysis.py
-    # and the same behavior as `fccanalysis run`
-    import argparse
-    parser = argparse.ArgumentParser()
-    run(parser, parser)