From 6a0580c6d97bf99531e8fb952170d2bf3d4f10b1 Mon Sep 17 00:00:00 2001 From: Louis Moureaux Date: Tue, 3 Jul 2018 15:09:22 +0200 Subject: [PATCH] Add man page for clusterAnaScurve.py See PR #123 --- doc/conf.py | 2 + doc/macros.rst | 6 +- doc/man/clusterAnaScurve.rst | 4 + macros/clusterAnaScurve.py | 270 +++++++++++++++++++++++++++++++++-- mapping/chamberInfo.py | 1 + 5 files changed, 269 insertions(+), 14 deletions(-) create mode 100644 doc/man/clusterAnaScurve.rst diff --git a/doc/conf.py b/doc/conf.py index 3ce160e2..aa656fe0 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -244,6 +244,8 @@ authors, 1), ('man/packageFiles4Docker', 'packageFiles4Docker.py', u'Creates a tarball containing data', authors, 1), + ('man/clusterAnaScurve', 'clusterAnaScurve.py', u'Analyze S-curves using the LSF cluster', + authors, 1), ] # If true, show URL addresses after external links. diff --git a/doc/macros.rst b/doc/macros.rst index e06fda7d..4de7f2a9 100644 --- a/doc/macros.rst +++ b/doc/macros.rst @@ -31,14 +31,10 @@ Macros :undoc-members: :show-inheritance: -.. automodule:: clusterAnaScurve - :members: - :undoc-members: - :show-inheritance: - .. toctree:: :maxdepth: 1 + man/clusterAnaScurve man/gemPlotter .. automodule:: gemSCurveAnaToolkit diff --git a/doc/man/clusterAnaScurve.rst b/doc/man/clusterAnaScurve.rst new file mode 100644 index 00000000..a2cebc4b --- /dev/null +++ b/doc/man/clusterAnaScurve.rst @@ -0,0 +1,4 @@ +.. automodule:: clusterAnaScurve + :members: + :undoc-members: + :show-inheritance: diff --git a/macros/clusterAnaScurve.py b/macros/clusterAnaScurve.py index a2764888..5bc167d5 100755 --- a/macros/clusterAnaScurve.py +++ b/macros/clusterAnaScurve.py @@ -1,17 +1,269 @@ #!/bin/env python -""" -clusterAnaScurve -================ +r""" +``clusterAnaScurve.py`` --- Analyze S-curves using the LSF cluster +================================================================== + +Synopsis +-------- + +**clusterAnaScurve.py** :token:`-q` <*QUEUE*> :token:`-t` *long|short* :token:`--anaType` *scurve|trim* (:token:`--chamberName` <*NAME*> | :token:`-i` <*FILE*>) + +Description +----------- + +This tool will allow you to re-analyze the scurve data in a straightforward way +without the time consuming process of launching it by hand. Takes a list of +scandates file in the :any:`Two Columns Format`, and launches a job for each +``(chamberName, scandate)`` pair. Each job will launch +:program:`anaUltraScurve.py`. + +Mandatory arguments +------------------- + +The following list shows the mandatory inputs that must be supplied to execute +the script. + +.. program:: clusterAnaScurve.py + +.. option:: --anaType scurve|trim + + Analysis type to be executed. + +.. option:: --chamberName + + Name of detector to be analyzed, from the values in + :py:data:`gempython.gemplotting.mapping.chamberInfo.chamber_config`. Either + this option or :option:`--infilename` must be supplied. + +.. option:: -i, --infilename + + Physical file name of the input file. The format of this input file should + follow the :any:`Two Column Format`. Either this option or + :option:`--chamberName` must be supplied. + +.. option:: -q, --queue + + Queue to submit your jobs to. Suggested options are ``8nm``, ``1nh`` and + ``1nd``. + +.. option:: -t, --type long|short + + Specify GEB/detector type. + +Optional arguments +------------------ + +.. option:: --calFile + + File specifying CAL_DAC/VCAL to fC equations per VFAT. If this is not + provided the analysis will default to hardcoded conversion for VFAT2. See + :py:class:`gempython.gemplotting.fitting.fitScanData.ScanDataFitter` for + more information. + +.. option:: -c, --channels + + Output plots will be made vs VFAT channel instead of ROB strip. + +.. option:: -d, --debug + + If provided all cluster files will be created for inspection, and job + submission commands printed to terminal, but no jobs will be submitted to + the cluster. Using this option before submitting a large number of jobs is + strongly recommended. + +.. option:: --endDate + + If :option:`--infilename` is not supplied this is the ending scandate, in + ``YYYY.MM.DD`` format, to be considered for job submission. The default is + whatever ``datetime.today()`` evaluates to. + +.. option:: --extChanMapping + + Physical file name of a custom, non-default, channel mapping file. If not + provided the default slice test ROB strip to VFAT channel mapping will be + used. + +.. option:: -f, --fit + + Fit S-curves and save fit information to output ``TFile``. + +.. option:: -p, --panasonic + + Output plots will be made vs Panasonic pins instead of ROB strip. + +.. option:: --startDate + + If :option:`--infilename` is not supplied this is the starting scandate, in + YYYY.MM.DD format, to be considered for job submission. Default is + ``2017.01.01`` so the start of the slice test will be used. + +.. option:: --zscore + + Z-Score for Outlier Identification in the MAD Algorithm. For details see + talks by `B. Dorney`_ or `L. Moureaux`_. + + .. _B. Dorney: https://indico.cern.ch/event/638404/contributions/2643292/attachments/1483873/2302543/BDorney_OpsMtg_20170627.pdf + + .. _L. Moureaux: https://indico.cern.ch/event/659794/contributions/2691237/attachments/1508531/2351619/UpdateOnHotChannelIdentificationAlgo.pdf + +.. option:: --ztrim + + Specify the :math:`P`-value of the trim in the quantity: + ``scurve_mean - ztrim * scurve_sigma`` + +Finally :program:`clusterAnaScurve.py` can also be passed the cut values used in +assigning a ``maskReason`` described at +:any:`Providing Cuts for ``maskReason`` at Runtime`. + +Full Example For P5 S-Curve Data +-------------------------------- + +Before you start due to space limitations on AFS it is strongly recommended that +your :envvar:`DATA_PATH` variable on lxplus point to the work area rather than +the user area, e.g.: + +.. code-block:: bash + + export DATA_PATH=/afs/cern.ch/work/// + +In your work area you can have up to 100GB of space. If this is your first time +using ``lxplus`` you may want to increase your storage quota by following +instructions `here`_. + +.. _here: https://resources.web.cern.ch/resources/Help/?kbid=067040 + +Now connect to the P5 ``dqm`` machine. Then if you are interested in a chamber +``ChamberName`` execute: + +.. code-block:: bash + + cd $HOME + plotTimeSeries.py --listOfScanDatesOnly --startDate=2017.01.01 + packageFiles4Docker.py --ignoreFailedReads --fileListScurve=/gemdata//scurve/listOfScanDates.txt --tarBallName=_scurves.tar --ztrim=4 --onlyRawData + +Then connect to ``lxplus``, and after setting up the env execute: + +.. code-block:: bash + + cd $DATA_PATH + scp @cmsusr.cms:/nfshome0//_scurves.tar . + tar -xf _scurves.tar + mv gemdata/ . + clusterAnaScurve.py -i /scurve/listOfScanDates.txt --anaType=scurve -f -q 1nh + +It may take some time to finish the job submission. Please pay attention to the +output at the end of the :program:`clusterAnaScurve.py` command as it provides +helpful information for managing jobs and understanding what comes next. Once +your jobs are complete you should check that they all finished successfully. One +way to do this is to check if any of them exited with status ``Exited`` and +check for the exit code. To do this execute: + +.. code-block:: bash + + grep -R "exit code" /scurve/*/stdout/jobOut.txt --color + +This will print a single line from all files where the string exit code appears. +For example: + + GEMINIm01L1/scurve/2017.04.10.20.33/stdout/jobOut.txt:Exited with exit code 255. + GEMINIm01L1/scurve/2017.04.26.12.25/stdout/jobOut.txt:Exited with exit code 255. + GEMINIm01L1/scurve/2017.04.27.13.27/stdout/jobOut.txt:Exited with exit code 255. + GEMINIm01L1/scurve/2017.06.07.12.17/stdout/jobOut.txt:Exited with exit code 255. + GEMINIm01L1/scurve/2017.07.18.11.09/stdout/jobOut.txt:Exited with exit code 255. + GEMINIm01L1/scurve/2017.07.18.18.34/stdout/jobOut.txt:Exited with exit code 255. + +For those lines that appear in the grep output command you will need to check +the standard err of the job which can be found in: + +.. code-block:: bash + + /scurve//stderr/jobErr.txt + +Note since some scans at P5 may have failed to complete successfully some jobs +may intrinsically fail and be non-recoverable. If you have questions about a +particular job you can try to search in the e-log around the scandate in time to +see if anything occurred around this time that might cause problems for the +scan. If you would like to re-analyze a failed job you can do so by calling: + +.. code-block:: bash + + $DATA_PATH//scurve//clusterJob.sh + +If a large number of jobs have failed you should spend some time trying to +understand why, and then re-submit to the cluster, rather than attempting to +analyze them all by hand. + +Finally after you are satisfied that all the jobs that could complete +successfully have completed you can: + +#. Re-package the re-analyzed data into a tarball, and/or +#. Create time series plots to summarize the entire dataset. + +For case 1, re-packaging the re-analyzed files into a tarball, execute: + +.. code-block:: bash + + packageFiles4Docker.py --ignoreFailedReads --fileListScurve=/scurve/listOfScanDates.txt --tarBallName=_scurves_reanalyzed.tar --ztrim=4 + mv _scurves_reanalyzed.tar $HOME/public + chmod 755 $HOME/public/_scurves_reanalyzed.tar + echo $HOME/public/_scurves_reanalyzed.tar + +Then provide the terminal output of this last command to one of the GEM DAQ +Experts for mass-storage. + +For case 2, create time series plots to summarize the entire dataset, execute: + +.. code-block:: bash + + $VIRTUAL_ENV/lib/python2.7/site-packages/gempython/gemplotting/mapping/chamberInfo.py + +And ensure the only uncommented entries of the ``chamber_config`` dictionary +match the set of ChamberName's that you have submitted jobs for. Then execute: + +.. code-block:: bash + + plotTimeSeries.py --startDate=2017.01.01 --anaType=scurve + +Please note the above command may take some time to process depending on the +number of detectors worth of data you are trying to analyze. Then a series of +output ``*.png`` and ``*.root`` files will be found at: + +.. code-block:: bash + + $ELOG_PATH/timeSeriesPlots//vt1bump0/ + +If you would prefer to analyze ChamberName's one at a time, or to have an output +``*.png`` file for each VFAT, you can produce time series plots individually by +executing the :program:`gemPlotter.py` commands provided at the end of the +:program:`clusterAnaScurve.py` output. This might be preferred as when analyzing +a large period of time the 3-by-8 grid plots that :program:`plotTimeSeries.py` +will produce for you may be hard to read. In either case +:program:`gemPlotter.py` or :program:`plotTimeSeries.py` will produce a +``TFile`` for you in which the plots at the per VFAT level are stored for you to +later investigate. + +If you encounter issues in this procedure please spend some time trying to +figure out what wrong on your side first. If after studying the documentation +and reviewing the commands you have exeuted you still do not understand the +failure please ask on the ``Software`` channel of the CMS GEM Ops Mattermost +team or submit an issue to the `github page`_. + +.. _github page: https://github.com/cms-gem-daq-project/gem-plotting-tools/issues/new + +Environment +----------- + +.. glossary:: + + :envvar:`DATA_PATH` + The location of input data + + :envvar:`ELOG_PATH` + Results are written in the directory pointed to by this variable """ if __name__ == '__main__': - """ - Takes a list of scandates file, see parseListOfScanDatesFile(...) documentation, - and launches a job for each (chamberName, scandate) pair. Each job will - launch anaUltraScurve.py - """ - from optparse import OptionParser, OptionGroup parser = OptionParser() parser.add_option("--anaType", type="string", dest="anaType", diff --git a/mapping/chamberInfo.py b/mapping/chamberInfo.py index 51dcfefe..d6403c86 100644 --- a/mapping/chamberInfo.py +++ b/mapping/chamberInfo.py @@ -3,6 +3,7 @@ -------------------------------------------------------- """ +#: Available chambers chamber_config = { #Coffin Setup #0:"GE11-VI-L-CERN-0002"