From 6a0580c6d97bf99531e8fb952170d2bf3d4f10b1 Mon Sep 17 00:00:00 2001
From: Louis Moureaux <lmoureau@ulb.ac.be>
Date: Tue, 3 Jul 2018 15:09:22 +0200
Subject: [PATCH] Add man page for clusterAnaScurve.py

See PR #123
---
 doc/conf.py                  |   2 +
 doc/macros.rst               |   6 +-
 doc/man/clusterAnaScurve.rst |   4 +
 macros/clusterAnaScurve.py   | 270 +++++++++++++++++++++++++++++++++--
 mapping/chamberInfo.py       |   1 +
 5 files changed, 269 insertions(+), 14 deletions(-)
 create mode 100644 doc/man/clusterAnaScurve.rst
diff --git a/doc/conf.py b/doc/conf.py
index 3ce160e2..aa656fe0 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -244,6 +244,8 @@
      authors, 1),
     ('man/packageFiles4Docker', 'packageFiles4Docker.py', u'Creates a tarball containing data',
      authors, 1),
+    ('man/clusterAnaScurve', 'clusterAnaScurve.py', u'Analyze S-curves using the LSF cluster',
+     authors, 1),
 ]
 
 # If true, show URL addresses after external links.
diff --git a/doc/macros.rst b/doc/macros.rst
index e06fda7d..4de7f2a9 100644
--- a/doc/macros.rst
+++ b/doc/macros.rst
@@ -31,14 +31,10 @@ Macros
     :undoc-members:
     :show-inheritance:
 
-.. automodule:: clusterAnaScurve
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 .. toctree::
     :maxdepth: 1
 
+    man/clusterAnaScurve
     man/gemPlotter
 
 .. automodule:: gemSCurveAnaToolkit
diff --git a/doc/man/clusterAnaScurve.rst b/doc/man/clusterAnaScurve.rst
new file mode 100644
index 00000000..a2cebc4b
--- /dev/null
+++ b/doc/man/clusterAnaScurve.rst
@@ -0,0 +1,4 @@
+.. automodule:: clusterAnaScurve
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/macros/clusterAnaScurve.py b/macros/clusterAnaScurve.py
index a2764888..5bc167d5 100755
--- a/macros/clusterAnaScurve.py
+++ b/macros/clusterAnaScurve.py
@@ -1,17 +1,269 @@
 #!/bin/env python
 
-"""
-clusterAnaScurve
-================
+r"""
+``clusterAnaScurve.py`` --- Analyze S-curves using the LSF cluster
+==================================================================
+
+Synopsis
+--------
+
+**clusterAnaScurve.py** :token:`-q` <*QUEUE*> :token:`-t` *long|short* :token:`--anaType` *scurve|trim* (:token:`--chamberName` <*NAME*> | :token:`-i` <*FILE*>)
+
+Description
+-----------
+
+This tool will allow you to re-analyze the scurve data in a straightforward way
+without the time consuming process of launching it by hand. Takes a list of
+scandates file in the :any:`Two Columns Format`, and launches a job for each
+``(chamberName, scandate)`` pair. Each job will launch
+:program:`anaUltraScurve.py`.
+
+Mandatory arguments
+-------------------
+
+The following list shows the mandatory inputs that must be supplied to execute
+the script.
+
+.. program:: clusterAnaScurve.py
+
+.. option:: --anaType scurve|trim
+
+    Analysis type to be executed.
+
+.. option:: --chamberName <CHAMBER NAME>
+
+    Name of detector to be analyzed, from the values in
+    :py:data:`gempython.gemplotting.mapping.chamberInfo.chamber_config`. Either
+    this option or :option:`--infilename` must be supplied.
+
+.. option:: -i, --infilename <FILE>
+
+    Physical file name of the input file. The format of this input file should
+    follow the :any:`Two Column Format`. Either this option or
+    :option:`--chamberName` must be supplied.
+
+.. option:: -q, --queue <QUEUE>
+
+    Queue to submit your jobs to. Suggested options are ``8nm``, ``1nh`` and
+    ``1nd``.
+
+.. option:: -t, --type long|short
+
+    Specify GEB/detector type.
+
+Optional arguments
+------------------
+
+.. option:: --calFile <FILE>
+
+    File specifying CAL_DAC/VCAL to fC equations per VFAT. If this is not
+    provided the analysis will default to hardcoded conversion for VFAT2. See
+    :py:class:`gempython.gemplotting.fitting.fitScanData.ScanDataFitter` for
+    more information.
+
+.. option:: -c, --channels
+
+    Output plots will be made vs VFAT channel instead of ROB strip.
+
+.. option:: -d, --debug
+
+    If provided all cluster files will be created for inspection, and job
+    submission commands printed to terminal, but no jobs will be submitted to
+    the cluster. Using this option before submitting a large number of jobs is
+    strongly recommended.
+
+.. option:: --endDate <YYYY.MM.DD>
+
+    If :option:`--infilename` is not supplied this is the ending scandate, in
+    ``YYYY.MM.DD`` format, to be considered for job submission. The default is
+    whatever ``datetime.today()`` evaluates to.
+
+.. option:: --extChanMapping <FILE>
+
+    Physical file name of a custom, non-default, channel mapping file. If not
+    provided the default slice test ROB strip to VFAT channel mapping will be
+    used.
+
+.. option:: -f, --fit
+
+    Fit S-curves and save fit information to output ``TFile``.
+
+.. option:: -p, --panasonic
+
+    Output plots will be made vs Panasonic pins instead of ROB strip.
+
+.. option:: --startDate <YYYY.MM.DD>
+
+    If :option:`--infilename` is not supplied this is the starting scandate, in
+    YYYY.MM.DD format, to be considered for job submission. Default is
+    ``2017.01.01`` so the start of the slice test will be used.
+
+.. option:: --zscore <NUMBER>
+
+    Z-Score for Outlier Identification in the MAD Algorithm. For details see
+    talks by `B. Dorney`_ or `L. Moureaux`_.
+
+    .. _B. Dorney: https://indico.cern.ch/event/638404/contributions/2643292/attachments/1483873/2302543/BDorney_OpsMtg_20170627.pdf
+
+    .. _L. Moureaux: https://indico.cern.ch/event/659794/contributions/2691237/attachments/1508531/2351619/UpdateOnHotChannelIdentificationAlgo.pdf
+
+.. option:: --ztrim <NUMBER>
+
+    Specify the :math:`P`-value of the trim in the quantity:
+    ``scurve_mean - ztrim * scurve_sigma``
+
+Finally :program:`clusterAnaScurve.py` can also be passed the cut values used in
+assigning a ``maskReason`` described at
+:any:`Providing Cuts for ``maskReason`` at Runtime`.
+
+Full Example For P5 S-Curve Data
+--------------------------------
+
+Before you start due to space limitations on AFS it is strongly recommended that
+your :envvar:`DATA_PATH` variable on lxplus point to the work area rather than
+the user area, e.g.:
+
+.. code-block:: bash
+
+    export DATA_PATH=/afs/cern.ch/work/<first-letter-of-your-username>/<your-user-name>/<somepath>
+
+In your work area you can have up to 100GB of space. If this is your first time
+using ``lxplus`` you may want to increase your storage quota by following
+instructions `here`_.
+
+.. _here: https://resources.web.cern.ch/resources/Help/?kbid=067040
+
+Now connect to the P5 ``dqm`` machine. Then if you are interested in a chamber
+``ChamberName`` execute:
+
+.. code-block:: bash
+
+    cd $HOME
+    plotTimeSeries.py --listOfScanDatesOnly --startDate=2017.01.01
+    packageFiles4Docker.py --ignoreFailedReads --fileListScurve=/gemdata/<ChamberName>/scurve/listOfScanDates.txt --tarBallName=<ChamberName>_scurves.tar --ztrim=4 --onlyRawData
+
+Then connect to ``lxplus``, and after setting up the env execute:
+
+.. code-block:: bash
+
+    cd $DATA_PATH
+    scp <your-user-name>@cmsusr.cms:/nfshome0/<your-user-name>/<ChamberName>_scurves.tar .
+    tar -xf <ChamberName>_scurves.tar
+    mv gemdata/<ChamberName> .
+    clusterAnaScurve.py -i <ChamberName>/scurve/listOfScanDates.txt --anaType=scurve -f -q 1nh
+
+It may take some time to finish the job submission. Please pay attention to the
+output at the end of the :program:`clusterAnaScurve.py` command as it provides
+helpful information for managing jobs and understanding what comes next. Once
+your jobs are complete you should check that they all finished successfully. One
+way to do this is to check if any of them exited with status ``Exited`` and
+check for the exit code. To do this execute:
+
+.. code-block:: bash
+
+    grep -R "exit code" <ChamberName>/scurve/*/stdout/jobOut.txt --color
+
+This will print a single line from all files where the string exit code appears.
+For example:
+
+    GEMINIm01L1/scurve/2017.04.10.20.33/stdout/jobOut.txt:Exited with exit code 255.
+    GEMINIm01L1/scurve/2017.04.26.12.25/stdout/jobOut.txt:Exited with exit code 255.
+    GEMINIm01L1/scurve/2017.04.27.13.27/stdout/jobOut.txt:Exited with exit code 255.
+    GEMINIm01L1/scurve/2017.06.07.12.17/stdout/jobOut.txt:Exited with exit code 255.
+    GEMINIm01L1/scurve/2017.07.18.11.09/stdout/jobOut.txt:Exited with exit code 255.
+    GEMINIm01L1/scurve/2017.07.18.18.34/stdout/jobOut.txt:Exited with exit code 255.
+
+For those lines that appear in the grep output command you will need to check
+the standard err of the job which can be found in:
+
+.. code-block:: bash
+
+    <ChamberName>/scurve/<scandate>/stderr/jobErr.txt
+
+Note since some scans at P5 may have failed to complete successfully some jobs
+may intrinsically fail and be non-recoverable. If you have questions about a
+particular job you can try to search in the e-log around the scandate in time to
+see if anything occurred around this time that might cause problems for the
+scan. If you would like to re-analyze a failed job you can do so by calling:
+
+.. code-block:: bash
+
+    $DATA_PATH/<ChamberName>/scurve/<scandate>/clusterJob.sh
+
+If a large number of jobs have failed you should spend some time trying to
+understand why, and then re-submit to the cluster, rather than attempting to
+analyze them all by hand.
+
+Finally after you are satisfied that all the jobs that could complete
+successfully have completed you can:
+
+#. Re-package the re-analyzed data into a tarball, and/or
+#. Create time series plots to summarize the entire dataset.
+
+For case 1, re-packaging the re-analyzed files into a tarball, execute:
+
+.. code-block:: bash
+
+    packageFiles4Docker.py --ignoreFailedReads --fileListScurve=<ChamberName>/scurve/listOfScanDates.txt --tarBallName=<ChamberName>_scurves_reanalyzed.tar --ztrim=4
+    mv <ChamberName>_scurves_reanalyzed.tar $HOME/public
+    chmod 755 $HOME/public/<ChamberName>_scurves_reanalyzed.tar
+    echo $HOME/public/<ChamberName>_scurves_reanalyzed.tar
+
+Then provide the terminal output of this last command to one of the GEM DAQ
+Experts for mass-storage.
+
+For case 2, create time series plots to summarize the entire dataset, execute:
+
+.. code-block:: bash
+
+    <editor of your choice> $VIRTUAL_ENV/lib/python2.7/site-packages/gempython/gemplotting/mapping/chamberInfo.py
+
+And ensure the only uncommented entries of the ``chamber_config`` dictionary
+match the set of ChamberName's that you have submitted jobs for. Then execute:
+
+.. code-block:: bash
+
+    plotTimeSeries.py --startDate=2017.01.01 --anaType=scurve
+
+Please note the above command may take some time to process depending on the
+number of detectors worth of data you are trying to analyze. Then a series of
+output ``*.png`` and ``*.root`` files will be found at:
+
+.. code-block:: bash
+
+    $ELOG_PATH/timeSeriesPlots/<ChamberName>/vt1bump0/
+
+If you would prefer to analyze ChamberName's one at a time, or to have an output
+``*.png`` file for each VFAT, you can produce time series plots individually by
+executing the :program:`gemPlotter.py` commands provided at the end of the
+:program:`clusterAnaScurve.py` output. This might be preferred as when analyzing
+a large period of time the 3-by-8 grid plots that :program:`plotTimeSeries.py`
+will produce for you may be hard to read. In either case
+:program:`gemPlotter.py` or :program:`plotTimeSeries.py` will produce a
+``TFile`` for you in which the plots at the per VFAT level are stored for you to
+later investigate.
+
+If you encounter issues in this procedure please spend some time trying to
+figure out what wrong on your side first. If after studying the documentation
+and reviewing the commands you have exeuted you still do not understand the
+failure please ask on the ``Software`` channel of the CMS GEM Ops Mattermost
+team or submit an issue to the `github page`_.
+
+.. _github page: https://github.com/cms-gem-daq-project/gem-plotting-tools/issues/new
+
+Environment
+-----------
+
+.. glossary::
+
+    :envvar:`DATA_PATH`
+        The location of input data
+
+    :envvar:`ELOG_PATH`
+        Results are written in the directory pointed to by this variable
 """
 
 if __name__ == '__main__':
-    """
-    Takes a list of scandates file, see parseListOfScanDatesFile(...) documentation,
-    and launches a job for each (chamberName, scandate) pair.  Each job will 
-    launch anaUltraScurve.py
-    """
-    
     from optparse import OptionParser, OptionGroup
     parser = OptionParser()
     parser.add_option("--anaType", type="string", dest="anaType",
diff --git a/mapping/chamberInfo.py b/mapping/chamberInfo.py
index 51dcfefe..d6403c86 100644
--- a/mapping/chamberInfo.py
+++ b/mapping/chamberInfo.py
@@ -3,6 +3,7 @@
 --------------------------------------------------------
 """
 
+#: Available chambers
 chamber_config = {
     #Coffin Setup
     #0:"GE11-VI-L-CERN-0002"