diff --git a/rtd/.readthedocs.yaml b/rtd/.readthedocs.yaml
new file mode 100644
index 000000000..26efcab11
--- /dev/null
+++ b/rtd/.readthedocs.yaml
@@ -0,0 +1,22 @@
+# .readthedocs.yaml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the version of Python and other tools you might need
+build:
+ os: ubuntu-22.04
+ tools:
+ python: "3.11"
+
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+ configuration: rtd/docs/source/conf.py
+
+# We recommend specifying your dependencies to enable reproducible builds:
+# https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+ install:
+ - requirements: rtd/docs/requirements.txt
diff --git a/rtd/README.rst b/rtd/README.rst
new file mode 100644
index 000000000..80b6a640f
--- /dev/null
+++ b/rtd/README.rst
@@ -0,0 +1,46 @@
+OVIS-HPC LDMS Documentation
+########################
+
+This repository hosts all LDMS related documentation such as how-to tutorials, getting started with LDMS, docker-hub links, API's and much more. Documentation webpage can be found in the `LDMS readthedocs webpage `_
+
+Contributing to ReadTheDocs
+############################
+Instructions and documentation on how to use ReadTheDocs can be found here:
+`readthedocs Help Guide `_
+
+
+* Clone the repository:
+
+.. code-block:: RST
+
+ > git clone git@github.com:/ovis-docs.git
+
+* Add any existing file name(s) you will be editing to paper.lock
+
+.. code-block:: RST
+
+ > vi paper.lock
+
+ | mm/dd |
+
+* Make necessary changes, update paper.lock file and push to repo.
+
+.. code-block:: RST
+
+ > vi paper.lock
+
+ ## remove line
+ > git add
+ > git commit -m "add message"
+ > git push
+
+Adding A New File
+******************
+For any new RST files created, please include them in rtd/docs/src/index.rst under their corresponding sections. All RST files not included in index.rst will not populate on the offical webpage (e.g. readthedocs).
+
+Paper Lock
+************
+This is for claiming any sections you are working on so there is no overlap.
+Please USE paper.lock to indicate if you are editing an existing RST file.
+
+
diff --git a/rtd/docs/make.bat b/rtd/docs/make.bat
new file mode 100644
index 000000000..9534b0181
--- /dev/null
+++ b/rtd/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/rtd/docs/requirements.txt b/rtd/docs/requirements.txt
new file mode 100644
index 000000000..8893b57d6
--- /dev/null
+++ b/rtd/docs/requirements.txt
@@ -0,0 +1,2 @@
+# compatable with the newest version of Sphinx (v7.2.1)
+sphinx_rtd_theme==1.3.0rc1
diff --git a/rtd/docs/source/asf/asf-quickstart.rst b/rtd/docs/source/asf/asf-quickstart.rst
new file mode 100644
index 000000000..4c96fd6e9
--- /dev/null
+++ b/rtd/docs/source/asf/asf-quickstart.rst
@@ -0,0 +1,130 @@
+AppSysFusion Quick Start
+==================================================================
+
+Create A Simple Analysis
+------------------------
+To start, please create a folder called ``graf_analysis`` in your home directory and copy the following contents to a python file called ``dsosTemplate.py``:
+
+* This is a python analysis that queries the DSOS database and returns a DataFrame of the ``meminfo`` schema metrics along with the ``timestamp``, ``component_id`` and ``job_id``.
+
+dsosTemplate.py:
+
+.. code-block :: python
+
+ import os, sys, traceback
+ import datetime as dt
+ from graf_analysis.grafanaAnalysis import Analysis
+ from sosdb import Sos
+ import pandas as pd
+ import numpy as np
+ class dsosTemplate(Analysis):
+ def __init__(self, cont, start, end, schema='meminfo', maxDataPoints=4096):
+ super().__init__(cont, start, end, schema, 1000000)
+
+ def get_data(self, metrics, filters=[],params=None):
+ try:
+ self.sel = f'select {",".join(metrics)} from {self.schema}'
+ where_clause = self.get_where(filters,res=FALSE)
+ order = 'time_job_comp'
+ orderby='order_by ' + order
+ self.query.select(f'{sel} {where_clause} {orderby}')
+ res = self.get_all_data(self.query)
+ # Fun stuff here!
+ print(res.head)
+ return res
+ except Exception as e:
+ a, b, c = sys.exc_info()
+ print(str(e)+' '+str(c.tb_lineno))
+
+.. note::
+
+ If you want to use this analysis module in a Grafana dashboard, you will need to ask your administrator to copy your new analysis module(s) into the directory that Grafana points to. This is because Grafana is setup to look at a specific path directory to query from.
+
+Test Analysis via Terminal Window
+----------------------------------
+You can easily test your module without the Grafana interface by creating a python script that mimics the Grafana query and formats the returned JSON into a timeseries dataframe or table.
+
+First, create the following file in the same directory as your python analysis (i.e. ``/user/home/graf_analysis/``) and label it ``testDSOSanalysis.py``.
+
+* This python script imitates the Grafana query that calls your analysis module and will return a timeseries DataFrame of the ``Active`` and ``Inactive`` meminfo metrics.
+
+.. code-block :: python
+
+ #!/usr/bin/python3
+
+ import time,sys
+ from sosdb import Sos
+ from grafanaFormatter import DataFormatter
+ from table_formatter import table_formatter
+ from time_series_formatter import time_series_formatter
+ from dsosTemplate import dsosTemplate
+
+ sess = Sos.Session("//config/dsos.conf")
+ cont = ''
+ cont = sess.open(cont)
+
+ model = dsosTemplate(cont, time.time()-300, time.time(), schema='meminfo', maxDataPoints=4096)
+
+ x = model.get_data(['Active','Inactive'], filters=['job_id'], params='')
+
+ #fmt = table_formatter(x)
+ fmt = time_series_formatter(x)
+ x = fmt.ret_json()
+ print(x)
+
+.. note::
+
+ You will need to provide the path to the DSOS container and ``Sos.Session()`` configuration file in order to run this python script. Please see the `Python Analysis Creation `_ for more details.
+
+* Next, run the python module:
+
+.. code-block :: bash
+
+ python3 dsosTemplate.py
+
+.. note::
+
+ All imports are python scripts that need to reside in the same directory as the test analysis module in order for it to run successfully.
+
+Then, run the python script with the current python verion installed. In this case it would be ``python3 ``
+
+Expected Results & Output
++++++++++++++++++++++++++
+The following is an example test of an analysis module that queries the ``meminfo`` schema an returns a timeseries dataframe of the ``Active`` and ``Inactive`` metrics:
+
+.. image:: ../images/grafana/grafana_output.png
+
+Test Analysis via Grafana Dashboard
+-----------------------------------
+You can optionally test the analysis in a grafana dashboard. This is not preferred because it is a bit more time consuming and, if there is a lot of data to query, there can be some additional wait time in that as well.
+
+Create A New Dashboard
+++++++++++++++++++++++++++
+To create a new dashboard, click on the + sign on the left side of the home page and hit dashboard. This will create a blank dashboard with an empty panel in it. Hit the add query button on the panel to begin configuring the query to be sent to an analysis module.
+
+.. note::
+
+ For more information on how to navigate around the Grafana dashboard and what the variables and advanced settings do, please see `Grafana Panel `_ and `Grafana Usage `_.
+
+* Next, add your analysis by filling out the required fields shown below:
+
+.. image:: ../images/grafana/grafana_query.png
+
+* These fields are identical to the python script you can generate to test in your terminal window so please refer to :ref:`Test Analysis via Terminal Window` or `Grafana Panel `_ for more details.
+
+* Now change the analysis to query from the last 5 minutes by selecting the down arrow in the top right of the panel and selecting "Last 5 minutes"
+
+.. image:: ../images/grafana/grafana_time.png
+ :height: 250
+ :width: 50
+
+* Then change the refresh rate to 5 seconds so that Grafana will automatically query the data every 5 seconds
+
+.. image:: ../images/grafana/grafana_timerange.png
+
+* Now you should be able to see a the "Active" and "Inactive" values for each job_id.
+
+.. image::
+
+
+
diff --git a/rtd/docs/source/asf/asf-tutorial.rst b/rtd/docs/source/asf/asf-tutorial.rst
new file mode 100644
index 000000000..31840b853
--- /dev/null
+++ b/rtd/docs/source/asf/asf-tutorial.rst
@@ -0,0 +1,2 @@
+Additional ASF Tutorial Material
+===============================
diff --git a/rtd/docs/source/asf/deployment/index.rst b/rtd/docs/source/asf/deployment/index.rst
new file mode 100644
index 000000000..96d8eebf5
--- /dev/null
+++ b/rtd/docs/source/asf/deployment/index.rst
@@ -0,0 +1,9 @@
+ASF Deployment
+===============
+This section covers how to deploy and test AppSysFusion
+
+.. toctree::
+ :maxdepth: 2
+
+ test
+
diff --git a/rtd/docs/source/asf/deployment/test.rst b/rtd/docs/source/asf/deployment/test.rst
new file mode 100644
index 000000000..9d51e9b41
--- /dev/null
+++ b/rtd/docs/source/asf/deployment/test.rst
@@ -0,0 +1,4 @@
+Github
+======
+
+Documentation for this is currently under development.
diff --git a/rtd/docs/source/asf/grafanapanel.rst b/rtd/docs/source/asf/grafanapanel.rst
new file mode 100644
index 000000000..fb408debe
--- /dev/null
+++ b/rtd/docs/source/asf/grafanapanel.rst
@@ -0,0 +1,47 @@
+Grafana Panel Creation with DSOS Plugin
+======================================
+
+To create a new dashboard, click on the + sign on the left side of the Grafana home page and select dashboard.
+This will create a blank dashboard with an empty panel in it. Panels can be thought of as a visualization of a single query. select the add query button on the panel to begin configuring the query to be sent to an analysis module.
+
+Configuring the Query and Visualization
+---------------------------------------
+.. image:: ../images/grafana/grafanapanel.png
+
+Once you right click on the panel title and select edit, the panel settings will appear. The first tab is for configuring the query. There are 8 fields in the query field defined below:
+
+* Query Type - type of query to perform. The most commonly used in "analysis" which calls an analysis module. "metrics" is used to return raw data without any analysis module.
+* Query Format - the type of visualization to be used on the dataset. It is used by Grafana Formatter to properly JSON-ify the data returned from the analysis module. Can be either time_series, table, or heatmap.
+* Analysis - required if you choose analysis query type. Specifies the python module to call to transofrm the data.
+* Container - the name of the container to be used. This can be either the full path to the container or the Django backend get_container function can be changed to customize for site settings.
+* Schema - What LDMS schema will be passed into the analysis module
+* Metric - Pass a metric, or a comma separated list (without spaces) of metrics, into the analysis module
+* Extra Params - (Optional) pass in an arbitrary string into the analysis module
+* Filters - (Optional) include a no-sql like syntax for filtering your query, can be a comma separated list of filters i.e. component_id == 5,job_id > 0
+
+The second tab in the panel settings is for visualization. Graph, Table, and Heatmap are the available visualizations for a query output.
+
+Text, which uses Markdown language, could also be used for Dashboard descriptions or details. If you use a graph visualization, the query Format should be time_series. If you use a table visualization, the query Format should be table.
+
+Graphs have multiple draw modes: bars, lines, and points. You can any or all of these draw modes on. You can also stack multiple time_series using the stack toggle button.
+
+For more information about how to view the data and configure the panels, please see Grafana's `Panels and Visualization Documentation `_
+
+Dashboard Variables and Advanced Settings
+-------------------------------------------
+.. image:: ../images/grafana/grafanapanel_variables.png
+
+Often we want users to be able to change inputs into the queries, however users cannot change edit queries. What they can edit in Grafana are variables, which are listed at the top of the dashboard. These variables can be referenced with a ``$`` in front of the variable name. For example, we can let the user switch SOS containers they are interested by creating a variable called container and then putting ``$container`` in the container field of the query. To create variables, go to the dashboard settings (gear button at the top right) and go to variables. Here you can create new variables. Common variable types are text boxes, for users to fill in, or queries. We can actually create a pre-populated list of options for certain fields by querying the container. Below are the queryable metrics what information to put in the query field.
+
+* Container - select the custom option in the **Type** field and add the name of the container being used to query from in the **custom options** field.
+* Schema - ``query=schema&container=``
+* Index - ``query=index&container=&schema=``
+* Metrics - ``query=metrics&container=&schema=``
+* Component IDs - ``query=components&container=&schema=``
+* Jobs - ``query=jobs&container=&schema=``
+
+You can put variables in queries as well. For example, if you already have a $container variable, you can set the schema variable query to be ``query=schema&container=$container``. Then the ``$schema`` variable can be used in other queries.
+
+In the dashboard settings you can also change the dashboard name and folder location and load previously saved versions.
+
+Other than the container variable, all other variables bulleted above are set to query in the **Type** field
diff --git a/rtd/docs/source/asf/grafanause.rst b/rtd/docs/source/asf/grafanause.rst
new file mode 100644
index 000000000..8d2eacf36
--- /dev/null
+++ b/rtd/docs/source/asf/grafanause.rst
@@ -0,0 +1,2 @@
+Basic Grafana Usage
+===================
diff --git a/rtd/docs/source/asf/index.rst b/rtd/docs/source/asf/index.rst
new file mode 100644
index 000000000..0b067db4d
--- /dev/null
+++ b/rtd/docs/source/asf/index.rst
@@ -0,0 +1,22 @@
+.. image:: ../images/appsysfusion.png
+ :width: 300
+ :height: 125
+ :align: center
+
+ASF
+====
+AppSysFusion provides analysis and visualization capabilities aimed at serving insights from HPC monitoring data gathered with LDMS, though could be generalized outside of that scope.
+It combines a Grafana front-end with a Django back-end to perform in-query analyses on raw data and return transformed information back to the end user.
+By performing in-query analyses, only data of interest to the end-user is operated on rather than the entirety of the dataset for all analyses for all time.
+This saves significant computation and storage resources with the penalty of slightly higher query times.
+These analyses are modular python scripts that can be easily added or changed to suit evolving needs.
+The current implementation is aimed at querying DSOS databases containing LDMS data, though efforts are in progress to abstract this functionality out to other databases and datatypes.
+
+.. toctree::
+ :maxdepth: 2
+
+ asf-quickstart
+ asf-tutorial
+ grafanapanel
+ grafanause
+ pyanalysis
diff --git a/rtd/docs/source/asf/pyanalysis.rst b/rtd/docs/source/asf/pyanalysis.rst
new file mode 100644
index 000000000..d14ea5aa8
--- /dev/null
+++ b/rtd/docs/source/asf/pyanalysis.rst
@@ -0,0 +1,299 @@
+Python Analysis Creation
+========================
+
+Analysis I/O
+-----------
+An analysis module is a python script and has a general template. There is a class, which must be called the same name as the python script itself, and two class functions: ``__init__`` and ``get_data``. The module is first initialized and then ``get_data`` is called. This should return a pandas DataFrame or a NumSOS DataSet (preferably the former if you are using python3). Below are the variables passed from the Grafana interface to these class functions.
+
+``__init__``
+ * ``cont`` - A Sos.Container object which contains the path information to the SOS container specified in the Grafana query
+ * ``start`` - The beginning of the time range of the Grafana query (in epoch time).
+ * ``end`` - The end of the time range of the Grafana query (in epoch time).
+ * ``schema`` - The LDMS schema specified by the Grafana query (e.g. meminfo).
+ * ``maxDataPoints`` - the maximum number of points that Grafana can display on the user's screen.
+
+``get_data``
+ * ``metrics`` - a python list of metrics specified by the Grafana query (e.g. ['Active','MemFree']).
+ * ``job_id`` - a string of the job_id specified by the Grafana query.
+ * ``user_name`` - a string of the user name specified by the Grafana query.
+ * ``params`` - a string of the extra parameters specified by the Grafana query (e.g. 'threshold = 10').
+ * ``filters`` - a python list of filter strings for the DSOS query (e.g. ['job_id == 30','component_id < 604']).
+/t
+Example Analysis Module
+-------------------------------------
+
+Below is a basic analysis that simply queries the database and returns the DataFrame of the metrics passed in along with the timestamp, component_id, and job_id for each metric.
+
+.. code-block :: python
+
+ import os, sys, traceback
+ import datetime as dt
+ from graf_analysis.grafanaAnalysis import Analysis
+ from sosdb import Sos
+ import pandas as pd
+ import numpy as np
+ class dsosTemplate(Analysis):
+ def __init__(self, cont, start, end, schema='job_id', maxDataPoints=4096):
+ super().__init__(cont, start, end, schema, 1000000)
+
+ def get_data(self, metrics, filters=[],params=None):
+ try:
+ sel = f'select {",".join(metrics)} from {self.schema}'
+ where_clause = self.get_where(filters)
+ order = 'time_job_comp'
+ orderby='order_by ' + order
+ self.query.select(f'{sel} {where_clause} {orderby}')
+ res = self.get_all_data(self.query)
+ # Fun stuff here!
+ print(res.head)
+ return res
+ except Exception as e:
+ a, b, c = sys.exc_info()
+ print(str(e)+' '+str(c.tb_lineno))
+
+In the ``__init__`` function, most things are set to be self variables to access them later in the ``get_data`` using the ``super()`` function. The ``super()`` function also sets up a variable called ``self.query`` which is a ``Sos.SqlQuery`` object. The 1000000 in the ``super()`` function sets the block size for this ``self.query`` object. An optimal block size is dependent on the query, however 1 million has been sufficiently performant to this point.
+
+In the ``get_data`` function we create a select clause for the DSOS query by joining the metrics and schema variables. The ``self.get_where`` is a graf_analysis class function which takes filter parameters and makes an SQL-like where clause string with ``self.start`` and ``self.end`` as timestamp boundaries. There is also the orderby variable which we are setting as ``time_job_comp`` here. This refers to what index we should use when querying the database. Our SOS databases are setup to use permutations of ``timestamp``, ``job ID``, and ``component ID`` as multi-indices. Depending on your filter, you may want to use a different multi-index.
+
+The ``self.get_all_data`` takes the Sos.SqlQuery object, ``self.query``, and calls ``self.query.next``. This returns a block size number of records that match the query from database defined by the cont variable. If there are more than a block size number of records, it continues calling ``self.query.next`` and appending the results to a pandas DataFrame until all data is returned.
+
+Additional analysis can be added where the "Fun stuff here!" comment is.
+
+With the example parameters specified in the last section, our select statement here would be ``select Active,MemFree from meminfo where timestamp > start and timestamp < end and job_id == 30 and component_id < 604 order_by time_job_comp``.
+
+.. note::
+
+ ``job_id`` and ``user_name`` must exist in the schema passed in for this command to work.
+
+Testing an Analysis Module
+--------------------------
+This section goes over how to test your python analysis module as a user.
+
+You do not need to query from the Grafana interface to test your module. Below is a simple code which mimics the Grafana pipeline and prints the JSON returned to Grafana.
+
+.. note::
+
+ **If Grafana and SOS are already installed on your system then please skip the `Required Scripts`_ section** and ask your system administrator where these scripts reside on the system so that you may copy all necessary python scripts and modules to your home directory, edit/modify exisiting python analysis modules and create new ones.
+
+
+.. code-block :: bash
+
+ export PYTHONPATH=/usr/bin/python://lib/python/site-packages/
+ export PATH=/usr/bin://bin://sbin::$PATH
+
+Then you can imitate the Grafana query to call your analysis module using a python script such as:
+
+.. code-block :: python
+
+ #!/usr/bin/python3
+
+ import time,sys
+ from sosdb import Sos
+ from grafanaFormatter import DataFormatter
+ from table_formatter import table_formatter
+ from time_series_formatter import time_series_formatter
+ from dsosTemplate import dsosTemplate
+
+ sess = Sos.Session("//config/dsos.conf")
+ cont = ''
+ cont = sess.open(cont)
+
+ model = dsosTemplate(cont, time.time()-300, time.time(), schema='meminfo', maxDataPoints=4096)
+
+ x = model.get_data(['Active'])
+
+ #fmt = table_formatter(x)
+ fmt = time_series_formatter(x)
+ x = fmt.ret_json()
+ print(x)
+
+* The ``model.get_data`` is where you can define the type of metrics to collect (in this case it is "Active"), what filters and extra parameters you want to add to your query. The syntax is as follows: ``([''], filters=['job_id>0'], params='')``
+
+* If you would like to query all metrics then replace ``Active`` with ``*``.
+* To query a specific job_id: set ``job_id`` to you job_id with ``==``.
+* To query from a specific time range: update the start time, ``time.time()-300`` and end time, ``time.time()`` to an epoch timestamp.
+* To add a string metric, filter or parameter, you must include a double quote, ``"``, before and after the string (i.e. ``filters=['user=="myusername"']``)
+
+.. note::
+
+ The ``params`` can be any number or string that you want to define in your analysis module to better manage, output or analyze the data. For example, you can program your module to return specific analyses such as the average with ``params='analysis=average'`` by parsing the arguement, using ``if`` statements to determine what analysis to apply to the data and, to make things cleaner, a function to perform these calculations in.
+/t
+Required Scripts
+////////////////
+The following scripts are needed to run the python analysis module. If these python scripts or modules **do not exist on your system and you have no way of accessing them** then please continue. Otherwise, you can skip this section
+
+**If you do not have access to these existing scripts** then please create them in the same directory as your python analysis module.
+
+.. note::
+
+ If Grafana and SOS are installed on your system then please ask your system administator where these files reside on the system so that you can copy them to your home directory.
+
+grafanaFormatter:
+
+.. code:: RST
+
+ from sosdb import Sos
+ from sosdb.DataSet import DataSet
+ import numpy as np
+ import pandas as pd
+ import copy
+
+ class RowIter(object):
+ def __init__(self, dataSet):
+ self.dset = dataSet
+ self.limit = dataSet.get_series_size()
+ self.row_no = 0
+
+ def __iter__(self):
+ return self
+
+ def cvt(self, value):
+ if type(value) == np.datetime64:
+ return [ value.astype(np.int64) / 1000 ]
+ return value
+
+ def __next__(self):
+ if self.row_no >= self.limit:
+ raise StopIteration
+ res = [ self.cvt(self.dset[[col, self.row_no]]) for col in range(0, self.dset.series_count) ]
+ self.row_no += 1
+ return res
+
+ class DataFormatter(object):
+ def __init__(self, data):
+ self.result = []
+ self.data = data
+ self.fmt = type(self.data).__module__
+ self.fmt_data = {
+ 'sosdb.DataSet' : self.fmt_dataset,
+ 'pandas.core.frame' : self.fmt_dataframe,
+ 'builtins' : self.fmt_builtins
+ }
+
+ def ret_json(self):
+ return self.fmt_data[self.fmt]()
+
+ def fmt_dataset(self):
+ pass
+
+ def fmt_dataframe(self):
+ pass
+
+ def fmt_builtins(self):
+ pass
+
+table_formatter:
+
+.. code:: RST
+
+ from graf_analysis.grafanaFormatter import DataFormatter, RowIter
+ from sosdb.DataSet import DataSet
+ from sosdb import Sos
+ import numpy as np
+ import pandas as pd
+ import copy
+
+ class table_formatter(DataFormatter):
+ def fmt_dataset(self):
+ # Format data from sosdb DataSet object
+ if self.data is None:
+ return {"columns" : [{ "text" : "No papi jobs in time range" }] }
+
+ self.result = { "type" : "table" }
+ self.result["columns"] = [ { "text" : colName } for colName in self.data.series ]
+ rows = []
+ for row in RowIter(self.data):
+ rows.append(row)
+ self.result["rows"] = rows
+ return self.result
+
+ def fmt_dataframe(self):
+ if self.data is None:
+ return {"columns" : [{ "text" : "No papi jobs in time range" }] }
+
+ self.result = { "type" : "table" }
+ self.result["columns"] = [ { "text" : colName } for colName in self.data.columns ]
+ self.result["rows"] = self.data.to_numpy()
+ return self.result
+
+ def fmt_builtins(self):
+ if self.data is None:
+ return { "columns" : [], "rows" : [], "type" : "table" }
+ else:
+ return self.data
+
+time_series_formatter:
+
+.. code:: RST
+
+ from graf_analysis.grafanaFormatter import DataFormatter
+ from sosdb.DataSet import DataSet
+ from sosdb import Sos
+ import numpy as np
+ import pandas as pd
+ import copy
+
+ class time_series_formatter(DataFormatter):
+ def fmt_dataset(self):
+ # timestamp is always last series
+ if self.data is None:
+ return [ { "target" : "", "datapoints" : [] } ]
+
+ for series in self.data.series:
+ if series == 'timestamp':
+ continue
+ ds = DataSet()
+ ds.append_series(self.data, series_list=[series, 'timestamp'])
+ plt_dict = { "target" : series }
+ plt_dict['datapoints'] = ds.tolist()
+ self.result.append(plt_dict)
+ del ds
+ return self.result
+
+ def fmt_dataframe(self):
+ if self.data is None:
+ return [ { "target" : "", "datapoints" : [] } ]
+
+ for series in self.data.columns:
+ if series == 'timestamp':
+ continue
+ plt_dict = { "target" : series }
+ plt_dict['datapoints'] = self.fmt_datapoints([series, 'timestamp'])
+ self.result.append(plt_dict)
+ return self.result
+
+ def fmt_datapoints(self, series):
+ ''' Format dataframe to output expected by grafana '''
+ aSet = []
+ for row_no in range(0, len(self.data)):
+ aRow = []
+ for col in series:
+ v = self.data[col].values[row_no]
+ typ = type(v)
+ if typ.__module__ == 'builtins':
+ pass
+ elif typ == np.ndarray or typ == np.string_ or typ == np.str_:
+ v = str(v)
+ elif typ == np.float32 or typ == np.float64:
+ v = float(v)
+ elif typ == np.int64 or typ == np.uint64:
+ v = int(v)
+ elif typ == np.int32 or typ == np.uint32:
+ v = int(v)
+ elif typ == np.int16 or typ == np.uint16:
+ v = int(v)
+ elif typ == np.datetime64:
+ # convert to milliseconds from microseconds
+ v = v.astype(np.int64) / int(1e6)
+ else:
+ raise ValueError("Unrecognized numpy type {0}".format(typ))
+ aRow.append(v)
+ aSet.append(aRow)
+ return aSet
+
+ def fmt_builtins(self):
+ if self.data is None:
+ return [ { "target" : "", "datapoints" : [] } ]
+ else:
+ return self.data
+
diff --git a/rtd/docs/source/conf.py b/rtd/docs/source/conf.py
new file mode 100644
index 000000000..7de1dbac6
--- /dev/null
+++ b/rtd/docs/source/conf.py
@@ -0,0 +1,50 @@
+# Configuration file for the Sphinx documentation builder.
+
+# -- Project information
+
+project = 'OVIS-HPC'
+copyright = '2024, Sandia National Laboratories and Open Grid Computing, Inc.'
+author = 'SNL/OGC'
+
+release = '0.1'
+version = '0.1.0'
+
+# -- General configuration
+
+extensions = [
+ 'sphinx.ext.duration',
+ 'sphinx.ext.doctest',
+ 'sphinx.ext.autodoc',
+ 'sphinx.ext.autosummary',
+ 'sphinx.ext.intersphinx',
+]
+
+intersphinx_mapping = {
+ 'python': ('https://docs.python.org/3/', None),
+ 'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
+
+ # Link to the "apis" of the "hpc-ovis" project and subprojects
+ "ovis-hpc": ("https://ovis-hpc.readthedocs.io/en/latest/", None),
+ "sos": ("https://ovis-hpc.readthedocs.io/projects/sos/en/latest/", None),
+ "maestro": ("https://ovis-hpc.readthedocs.io/projects/maestro/en/latest/", None),
+ "baler": ("https://ovis-hpc.readthedocs.io/projects/baler/en/latest/", None),
+ "ldms": ("https://ovis-hpc.readthedocs.io/projects/ldms/en/latest/", None),
+
+}
+intersphinx_disabled_domains = ['std']
+intersphinx_disabled_reftypes = ["*"]
+
+templates_path = ['_templates']
+
+# -- Options for HTML output
+
+html_theme = 'sphinx_rtd_theme'
+html_static_path = ['static']
+html_logo = "https://github.com/ovis-hpc/readthedocs/blob/main/docs/source/images/ovis-logo.png?raw=true"
+html_theme_options = {
+ 'logo_only': True,
+ 'display_version': False,
+}
+
+# -- Options for EPUB output
+epub_show_urls = 'footnote'
diff --git a/rtd/docs/source/container-quickstart.rst b/rtd/docs/source/container-quickstart.rst
new file mode 100644
index 000000000..c03b5ba4f
--- /dev/null
+++ b/rtd/docs/source/container-quickstart.rst
@@ -0,0 +1,931 @@
+LDMS Containers
+===============
+
+``ovis-hpc/ldms-containers`` git repository contains recipes and scripts
+for building Docker Images of various components in LDMS, namely:
+
+- ``ovishpc/ldms-dev``: an image containing dependencies for building
+ OVIS binaries and developing LDMS plugins.
+- ``ovishpc/ldms-samp``: an image containing ``ldmsd`` binary and
+ sampler plugins.
+- ``ovishpc/ldms-agg``: an image containing ``ldmsd`` binary, sampler
+ plugins, and storage plugins (including SOS).
+- ``ovishpc/ldms-maestro``: an image containing ``maestro`` and
+ ``etcd``.
+- ``ovishpc/ldms-ui``: an image containing UI back-end elements,
+ providing LDMS data access over HTTP (``uwsgi`` + ``django`` +
+ `ovis-hpc/numsos `__ +
+ `ovis-hpc/sosdb-ui `__ +
+ `ovis-hpc/sosdb-grafana `__)
+- ``ovishpc/ldms-grafana``: an image containing ``grafana`` and the SOS
+ data source plugin for grafana
+ (`sosds `__)
+
+Table of Contents:
+
+- `Brief Overview About Docker
+ Containers <#brief-overview-about-docker-containers>`__
+- `Sites WITHOUT internet access <#sites-without-internet-access>`__
+- `SYNOPSIS <#SYNOPSIS>`__
+- `EXAMPLES <#EXAMPLES>`__
+- `LDMS Sampler Container <#ldms-sampler-container>`__
+- `LDMS Aggregator Container <#ldms-aggregator-container>`__
+- `Maestro Container <#maestro-container>`__
+- `LDMS UI Back-End Container <#ldms-ui-back-end-container>`__
+- `LDMS-Grafana Container <#ldms-grafana-container>`__
+- `SSH port forwarding to grafana <#ssh-port-forwarding-to-grafana>`__
+- `Building Containers <#building-containers>`__
+
+Brief Overview About Docker Containers
+--------------------------------------
+
+A docker container is a runnable instance of an image. In Linux, it is
+implemented using namespaces
+(`namespaces(7) `__).
+``docker create`` command creates a container that can later be started
+with ``docker start``, while ``docker run`` creates and starts the
+container in one go. When a container starts, the first process being
+run, or a root process, is the program specified by the ``--entrypoint``
+CLI option or ``ENTRYPOINT`` Dockerfile directive. When the root process
+exits or is killed, the container status becomes ``exited``.
+``docker stop`` command sends ``SIGTERM`` to the root process, and
+``docker kill`` command send ``SIGKILL`` to the root process. The other
+processes in the container are also terminated or killed when the root
+process is terminated or killed. ``docker ps`` shows "running"
+containers, while ``docker ps -a`` shows ALL containers (including the
+exited one).
+
+When a container is created (before started), its mount namespace
+(`mount_namespaces(7) `__)
+is prepared by the Docker engine. This isolates container's filesystems
+from the host. The Docker Image is the basis of the filesystem mounted
+in the container. The image itself is read-only, and the modification to
+the files/directories inside the container at runtime is done on the
+writable layer on top of the image. They are "unified" and presented to
+the container as a single filesystem by OverlayFS (most preferred by
+Docker, but other drivers like ``btrfs`` could also be used). A Docker
+Image is actually a collection of "layers" of root directories (``/``).
+When a container is ``stopped`` (the root process exited/killed), the
+writable top layer still persists until ``docker rm`` command removes
+the container.
+
+The network namespace
+(`network_namespace `__)
+and the process namespace (`process
+namespace `__)
+of a container are normally isolated, but could also use host's
+namespaces. The LDMS sampler containers (``ovishpc/ldms-samp``) require
+host process namespace (``--pid=host`` option) so that the ``ldmsd``
+reads host's ``/proc`` data. Otherwise, we will be collecting
+container's metric data. Other LDMS containers do not need host process
+namespace. For the network namespace, it is advisable to use host's
+network namespace (``--network=host``) to fully utilize RDMA hardware on
+the host with minimal effort in network configuration.
+
+Sites WITHOUT internet access
+-----------------------------
+
+#. On your laptop (or a machine that HAS the Internet access)
+
+.. code:: sh
+
+ $ docker pull ovishpc/ldms-dev
+ $ docker pull ovishpc/ldms-samp
+ $ docker pull ovishpc/ldms-agg
+ $ docker pull ovishpc/ldms-maestro
+ $ docker pull ovishpc/ldms-ui
+ $ docker pull ovishpc/ldms-grafana
+
+ $ docker save ovishpc/ldms-dev > ovishpc-ldms-dev.tar
+ $ docker save ovishpc/ldms-samp > ovishpc-ldms-samp.tar
+ $ docker save ovishpc/ldms-agg > ovishpc-ldms-agg.tar
+ $ docker save ovishpc/ldms-maestro > ovishpc-ldms-maestro.tar
+ $ docker save ovishpc/ldms-ui > ovishpc-ldms-ui.tar
+ $ docker save ovishpc/ldms-grafana > ovishpc-ldms-grafana.tar
+
+ # Then, copy these tar files to the site
+
+#. On the site that has NO Internet access
+
+.. code:: sh
+
+ $ docker load < ovishpc-ldms-dev.tar
+ $ docker load < ovishpc-ldms-samp.tar
+ $ docker load < ovishpc-ldms-agg.tar
+ $ docker load < ovishpc-ldms-maestro.tar
+ $ docker load < ovishpc-ldms-ui.tar
+ $ docker load < ovishpc-ldms-grafana.tar
+
+Then, the images are available locally (no need to ``docker pull``).
+
+SYNOPSIS
+--------
+
+In this section, the options in ``[ ]`` are optional. Please see the
+``#`` comments right after the options for the descriptions. Please also
+note that the options BEFORE the Docker Image name are for
+``docker run``, and the options AFTER the image name are for the
+entrypoint script. The following is the information regarding entrypoint
+options for each image:
+
+- ``ovishpc/ldms-dev`` entrypoint options are pass-through to
+ ``/bin/bash``.
+- ``ovishpc/ldms-samp`` entrypoint options are pass-through to ldmsd.
+- ``ovishpc/ldms-agg`` entrypoint options are pass-through to ldmsd.
+- ``ovishpc/ldms-maestro`` entrypoint options are ignored.
+- ``ovishpc/ldms-ui`` entrypoint options are pass-through to uwsgi.
+- ``ovishpc/ldms-grafana`` entrypoint options are pass-through to
+ grafana-server program.
+
+.. code:: sh
+
+ # Pulling images
+ $ docker pull ovishpc/ldms-dev
+ $ docker pull ovishpc/ldms-samp
+ $ docker pull ovishpc/ldms-agg
+ $ docker pull ovishpc/ldms-maestro
+ $ docker pull ovishpc/ldms-ui
+ $ docker pull ovishpc/ldms-grafana
+
+ # munge remark: munge.key file must be owned by 101:101 (which is munge:munge in
+ # the container) and has 0600 mode.
+
+ # ovishpc/ldms-maestro
+ $ docker run -d --name= --network=host --privileged
+ [ -v /run/munge:/run/munge:ro ] # expose host's munge to the container
+ [ -v /on-host/munge.key:/etc/munge/munge.key:ro ] # use container's munged with custom key
+ -v /on-host/ldms_cfg.yaml:/etc/ldms_cfg.yaml:ro # bind ldms_cfg.yaml, used by maestro_ctrl
+ ovishpc/ldms-maestro # the image name
+
+
+ # ovishpc/ldms-samp
+ $ docker run -d --name= --network=host --pid=host --privileged
+ -e COMPID= # set COMPID environment variable
+ [ -v /run/munge:/run/munge:ro ] # expose host's munge to the container
+ [ -v /on-host/munge.key:/etc/munge/munge.key:ro ] # use container's munged with custom key
+ ovishpc/ldms-samp # the image name
+ -x : # transport, listening port
+ [ -a munge ] # use munge authentication
+ [ OTHER LDMSD OPTIONS ]
+
+
+ # ovishpc/ldms-agg
+ $ docker run -d --name= --network=host --pid=host --privileged
+ -e COMPID= # set COMPID environment variable
+ [ -v /on-host/storage:/storage:rw ] # bind 'storage/'. Could be any path, depending on ldmsd configuration
+ [ -v /on-host/dsosd.json:/etc/dsosd.json:ro ] # bind dsosd.json configuration, if using dsosd to export SOS data
+ [ -v /run/munge:/run/munge:ro ] # expose host's munge to the container
+ [ -v /on-host/munge.key:/etc/munge/munge.key:ro ] # use container's munged with custom key
+ ovishpc/ldms-agg # the image name
+ -x : # transport, listening port
+ [ -a munge ] # use munge authentication
+ [ OTHER LDMSD OPTIONS ]
+ # Run dsosd to export SOS data
+ $ docker exec -it /bin/bash
+ () $ rpcbind
+ () $ export DSOSD_DIRECTORY=/etc/dsosd.json
+ () $ dsosd >/var/log/dsosd.log 2>&1 &
+ () $ exit
+
+
+ # ovishpc/ldms-ui
+ $ docker run -d --name= --network=host --privileged
+ -v /on-host/dsosd.conf:/opt/ovis/etc/dsosd.conf # dsosd.conf file, required to connect to dsosd
+ -v /on-host/settings.py:/opt/ovis/ui/sosgui/settings.py # sosdb-ui Django setting file
+ ovishpc/ldms-ui # the image name
+ [ --http-socket=: ] # addr:port to serve, ":80" by default
+ [ OTHER uWSGI OPTIONS ]
+
+
+ # ovishpc/ldms-grafana
+ $ docker run -d --name= --network=host --privileged
+ [ -v /on-host/grafana.ini:/etc/grafana/grafana.ini:ro ] # custom grafana config
+ [ -e GF_SERVER_HTTP_ADDR= ] # env var to override Grafana IP address binding (default: all addresses)
+ [ -e GF_SERVER_HTTP_PORT= ] # env var to override Grafana port binding (default: 3000)
+ ovishpc/ldms-grafana # the image name
+ [ OTHER GRAFANA-SERVER OPTIONS ] # other options to grafana-server
+
+
+ # -------------------------------------
+ # configuration files summary
+ # -------------------------------------
+ # - /on-host/dsosd.json: contains dictionary mapping hostname - container
+ # location in the host, e.g.
+ # {
+ # "host1": {
+ # "dsos_cont":"/storage/cont_host1"
+ # },
+ # "host2": {
+ # "dsos_cont":"/storage/cont_host2"
+ # }
+ # }
+ #
+ # - /on-host/dsosd.conf: contains host names (one per line) of the dsosd, e.g.
+ # host1
+ # host2
+ #
+ # - /on-host/settings.py: Django settings. Pay attention to DSOS_ROOT and
+ # DSOS_CONF variables.
+
+EXAMPLES
+--------
+
+In this example, we have 8-nodes cluster with host names cygnus-01 to
+cygnus-08. ``cygnus-0[1-4]`` are used as compute nodes (deploying
+``ovishpc/ldms-samp`` containers). ``cygnus-0[5-6]`` are used as L1
+aggregator (``ovishpc/ldms-agg`` containers without storage).
+``cygnus-07`` is used as L2 aggregator with a DSOS storage
+(``ovishpc/ldms-agg`` with dsosd). ``cygnus-07`` will also host
+``ovishpc/maestro``, ``ovishpc/ldms-ui`` and ``ovishpc/ldms-grafana``
+containers. We will be running commands from ``cygnus-07``. The cluster
+has ``munged`` pre-configured and running on all nodes with the same
+key.
+
+Configuration files used in this example are listed at the end of the
+section. The following is a list of commands that deploys various
+containers on the cygnus cluster:
+
+.. code:: sh
+
+ # Start sampler containers on cygnus-01,02,03,04
+ root@cygnus-07 $ pdsh -w cygnus-0[1-4] 'docker run -d --name=samp --network=host --pid=host --privileged -v /run/munge:/run/munge:ro -e COMPONENT_ID=${HOSTNAME#cygnus-0} ovishpc/ldms-samp -x rdma:411 -a munge'
+ # Notice the COMPONENT_ID environment variable setup using Bash substitution.
+ # The COMPONENT_ID environment variable is later used in LDMSD sampler plugin
+ # configuration `component_id: ${COMPONENT_ID}` in the `ldms_cfg.yaml` file.
+
+ # Start L1 aggregator containers on cygnus-05,06
+ root@cygnus-07 $ pdsh -w cygnus-0[5-6] docker run -d --name=agg1 --network=host --pid=host --privileged -v /run/munge:/run/munge:ro ovishpc/ldms-agg -x rdma:411 -a munge
+
+ # Start L2 aggregator container on cygnus-07
+ root@cygnus-07 $ docker run -d --name=agg2 --network=host --pid=host --privileged -v /run/munge:/run/munge:ro -v /store:/store:rw ovishpc/ldms-agg -x rdma:411 -a munge
+
+ # Start dsosd in the `agg2`, our L2 aggregator container
+ root@cygnus-07 $ echo 'rpcbind ; dsosd > /var/log/dsosd.log 2>&1 &' | docker exec -i agg2 /bin/bash
+
+ # Start maestro container on cygnus-07
+ root@cygnus-07 $ docker run -d --name=maestro --network=host --privileged -v /run/munge:/run/munge:ro -v ${PWD}/ldms_cfg.yaml:/etc/ldms_cfg.yaml:ro ovishpc/ldms-maestro
+
+ # Start Django UI container
+ root@cygnus-07 $ docker run -d --name=ui --network=host --privileged -v ${PWD}/dsosd.conf:/opt/ovis/etc/dsosd.conf -v ${PWD}/settings.py:/opt/ovis/ui/sosgui/settings.py ovishpc/ldms-ui
+
+ # Start Grafana container
+ root@cygnus-07 $ docker run -d --name=grafana --privileged --network=host ovishpc/ldms-grafana
+
+Related configuration files
+
+.. code:: sh
+
+ # dsosd.conf
+ cygnus-07
+
+.. code:: yaml
+
+ # ldms_cfg.yaml
+ xprt: &xprt "rdma"
+ daemons:
+ - names : &samp-names "samp-[1-4]"
+ hosts : &samp-hosts "cygnus-0[1-4]-iw"
+ endpoints :
+ - names : &samp-eps "cygnus-0[1-4]-iw-ep"
+ ports : 411
+ xprt : *xprt
+ maestro_comm : True
+ auth :
+ name : munge
+ plugin : munge
+ - names : &L1-names "agg-[11-12]"
+ hosts : &L1-hosts "cygnus-0[5-6]-iw"
+ endpoints :
+ - names : &L1-eps "agg-[11-12]-ep"
+ ports : 411
+ xprt : *xprt
+ maestro_comm : True
+ auth :
+ name : munge
+ plugin : munge
+ - names : &L2-name "agg-2"
+ hosts : &L2-host "cygnus-07-iw"
+ endpoints :
+ - names : &L2-ep "agg-2-ep"
+ ports : 411
+ xprt : *xprt
+ maestro_comm : True
+ auth :
+ name : munge
+ plugin : munge
+
+ aggregators:
+ - daemons : *L1-names
+ peers :
+ - daemons : *samp-names
+ endpoints : *samp-eps
+ reconnect : 1s
+ type : active
+ updaters :
+ - mode : pull
+ interval : "1.0s"
+ offset : "200ms"
+ sets :
+ - regex : .*
+ field : inst
+ - daemons : *L2-name
+ peers:
+ - daemons : *L1-names
+ endpoints : *L1-eps
+ reconnect : 1s
+ type : active
+ updaters :
+ - mode : pull
+ interval : "1.0s"
+ offset : "400ms"
+ sets :
+ - regex : .*
+ field : inst
+
+ samplers:
+ - daemons : *samp-names
+ plugins :
+ - name : meminfo # Variables can be specific to plugin
+ interval : "1s" # Used when starting the sampler plugin
+ offset : "0s"
+ config : &simple_samp_config
+ component_id : "${COMPONENT_ID}"
+ perm : "0777"
+
+ stores:
+ - name : sos-meminfo
+ daemons : *L2-name
+ container : meminfo
+ schema : meminfo
+ flush : 10s
+ plugin :
+ name : store_sos
+ config :
+ path : /store
+
+.. code:: py
+
+ # settings.py
+ """
+ Django settings for sosgui project.
+
+ Generated by 'django-admin startproject' using Django 1.8.2.
+
+ For more information on this file, see
+ https://docs.djangoproject.com/en/1.8/topics/settings/
+
+ For the full list of settings and their values, see
+ https://docs.djangoproject.com/en/1.8/ref/settings/
+ """
+
+ # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
+ import os
+ import json
+
+ log = open('/var/log/sosgui/settings.log', 'a')
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+ # Quick-start development settings - unsuitable for production
+ # See https://docs.djangoproject.com/en/1.8/howto/deployment/checklist/
+
+ # SECURITY WARNING: keep the secret key used in production secret!
+ SECRET_KEY = 'blablablablablablablablablablablablablablablablablabla'
+
+ # SECURITY WARNING: don't run with debug turned on in production!
+ DEBUG = True
+
+ ALLOWED_HOSTS = [
+ '*',
+ ]
+
+ APPEND_SLASH = False
+
+ STATIC_ROOT = os.path.join(BASE_DIR, "assets")
+
+ AUTH_USER_MODEL = 'sosdb_auth.SosdbUser'
+
+ # Application definition
+
+ INSTALLED_APPS = (
+ 'corsheaders',
+ 'django.contrib.admin',
+ 'django.contrib.auth',
+ 'django.contrib.contenttypes',
+ 'django.contrib.sessions',
+ 'django.contrib.messages',
+ 'django.contrib.staticfiles',
+ 'container',
+ 'jobs',
+ 'objbrowser',
+ 'sos_db',
+ 'sosdb_auth',
+ )
+
+ try:
+ from . import ldms_settings
+ INSTALLED_APPS = INSTALLED_APPS + ldms_settings.INSTALLED_APPS
+ except:
+ pass
+
+ try:
+ from . import grafana_settings
+ INSTALLED_APPS = INSTALLED_APPS + grafana_settings.INSTALLED_APPS
+ except:
+ pass
+
+ try:
+ from . import baler_settings
+ INSTALLED_APPS = INSTALLED_APPS + baler_settings.INSTALLED_APPS
+ except:
+ pass
+
+ MIDDLEWARE = (
+ 'corsheaders.middleware.CorsMiddleware',
+ 'django.contrib.sessions.middleware.SessionMiddleware',
+ 'django.middleware.common.CommonMiddleware',
+ 'django.contrib.auth.middleware.AuthenticationMiddleware',
+ 'django.contrib.messages.middleware.MessageMiddleware',
+ 'django.middleware.clickjacking.XFrameOptionsMiddleware',
+ 'django.middleware.security.SecurityMiddleware',
+ )
+
+ ROOT_URLCONF = 'sosgui.urls'
+
+ TEMPLATES = [
+ {
+ 'BACKEND': 'django.template.backends.django.DjangoTemplates',
+ 'DIRS': [
+ '/opt/ovis/ui/templates',
+ ],
+ 'APP_DIRS': True,
+ 'OPTIONS': {
+ 'context_processors': [
+ 'django.contrib.auth.context_processors.auth',
+ 'django.template.context_processors.debug',
+ 'django.template.context_processors.request',
+ 'django.contrib.messages.context_processors.messages',
+ ],
+ },
+ },
+ ]
+
+ WSGI_APPLICATION = 'sosgui.wsgi.application'
+
+
+ # Database
+ # https://docs.djangoproject.com/en/1.8/ref/settings/#databases
+
+ DATABASES = {
+ 'default': {
+ 'ENGINE': 'django.db.backends.sqlite3',
+ 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
+ }
+ }
+
+ LANGUAGE_CODE = 'en-us'
+
+ TIME_ZONE = 'UTC'
+
+ USE_I18N = True
+
+ USE_L10N = True
+
+ USE_TZ = True
+
+
+ # Static files (CSS, JavaScript, Images)
+ # https://docs.djangoproject.com/en/1.8/howto/static-files/
+
+ STATIC_URL = '/static/'
+
+ STATICFILES_DIRS = [
+ '/opt/ovis/ui/static/',
+ ]
+
+ SESSION_EXPIRE_AT_BROWSER_CLOSE = True
+ SOS_ROOT = "/store/"
+ DSOS_ROOT = "/store/"
+ DSOS_CONF = "/opt/ovis/etc/dsosd.conf"
+ LOG_FILE = "/var/log/sosgui/sosgui.log"
+ LOG_DATE_FMT = "%F %T"
+ ODS_LOG_FILE = "/var/log/sosgui/ods.log"
+ ODS_LOG_MASK = "255"
+ ODS_GC_TIMEOUT = 10
+ BSTORE_PLUGIN="bstore_sos"
+ os.environ.setdefault("BSTORE_PLUGIN_PATH", "/opt/ovis/lib64")
+ os.environ.setdefault("SET_POS_KEEP_TIME", "3600")
+
+
+ try:
+ import ldms_cfg
+ LDMS_CFG = ldms_cfg.aggregators
+ except Exception as e:
+ log.write(repr(e)+'\n')
+ LDMS_CFG = { "aggregators" : [] }
+
+ try:
+ import syslog
+ SYSLOG_CFG = syslog.syslog
+ except Exception as e:
+ log.write('SYSLOG_SETTINGS ERR '+repr(e)+'\n')
+ SYSLOG_CFG = { "stores" : [] }
+
+LDMS Sampler Container
+----------------------
+
+.. code:: sh
+
+ # SYNOPSIS
+ $ docker run -d --name= --network=host --pid=host --privileged
+ -e COMPID= # set COMPID environment variable
+ [ -v /run/munge:/run/munge:ro ] # expose host's munge to the container
+ [ -v /on-host/munge.key:/etc/munge/munge.key:ro ] # use container's munged with custom key
+ ovishpc/ldms-samp # the image name
+ -x : # transport, listening port
+ [ -a munge ] # use munge authentication
+ [ OTHER LDMSD OPTIONS ] # e.g. -v INFO
+
+``ovishpc/ldms-samp`` entrypoint executes ``ldmsd -F``, making it the
+leader process of the container. Users can append ``[OPTIONS]`` and they
+will be passed to ``ldmsd -F`` CLI. If ``-a munge`` is given, the
+entrypoint script will check if ``/run/munge`` is a bind-mount from the
+host. If so, munge encoding/decoding is done through ``munged`` on the
+host via the bind-mounged ``/run/munge`` -- no need to run ``munged``
+inside the container. Otherwise, in the case that ``-a munge`` is given
+and ``/run/munge`` is not host-bind-mounted, the entrypoint script runs
+``munged`` and tests it BEFORE ``ldmsd``.
+
+Usage examples:
+
+.. code:: sh
+
+ ## On a compute node
+
+ # Pull the container image
+ $ docker pull ovishpc/ldms-samp
+
+ # Start ldmsd container, with host network namespace and host PID namespace;
+ # - COMPID env var is HOSTNAME without the non-numeric prefixes and the leading
+ # zeroes (e.g. nid00100 => 100, nid10000 => 10000). Note that this uses
+ # bash(1) Parameter Expansion and Pattern Matching features.
+ #
+ # - serving on socket transport port 411 with munge authentication
+ #
+ # - using host munge
+ $ docker run -d --name=samp --network=host --pid=host --privileged \
+ -e COMPID=${HOSTNAME##*([^1-9])} \
+ -v /run/munge:/run/munge:ro \
+ ovishpc/ldms-samp -x sock:411 -a munge
+
+We encourage to use ``maestro`` to configure a cluster of ``ldmsd``.
+However, if there is a need to configure ``ldmsd`` manually, one can do
+from within the container. In this case:
+
+.. code:: sh
+
+ $ docker exec samp /bin/bash
+ (samp) $ ldmsd_controller --xprt sock --port 411 --host localhost --auth munge
+ LDMSD_CONTROLLER_PROMPT>
+
+LDMS Aggregator Container
+-------------------------
+
+.. code:: sh
+
+ # SYNOPSIS
+ $ docker run -d --name= --network=host --pid=host --privileged
+ -e COMPID= # set COMPID environment variable
+ [ -v /on-host/storage:/storage:rw ] # bind 'storage/'. Could be any path, depending on ldmsd configuration
+ [ -v /on-host/dsosd.json:/etc/dsosd.json:ro ] # bind dsosd.json configuration, if using dsosd to export SOS data
+ [ -v /run/munge:/run/munge:ro ] # expose host's munge to the container
+ [ -v /on-host/munge.key:/etc/munge/munge.key:ro ] # use container's munged with custom key
+ ovishpc/ldms-samp # the image name
+ -x : # transport, listening port
+ [ -a munge ] # use munge authentication
+ [ OTHER LDMSD OPTIONS ]
+ # dsosd to export SOS data
+ $ docker exec -it /bin/bash
+ () $ rpcbind
+ () $ export DSOSD_DIRECTORY=/etc/dsosd.json
+ () $ dsosd >/var/log/dsosd.log 2>&1 &
+ () $ exit
+
+``ovishpc/ldms-agg`` entrypoint executes ``ldmsd -F``, making it the
+leader process of the container. It also handles ``-a munge`` the same
+way that ``ovishpc/ldms-samp`` does. In the case of exporting SOS data
+through ``dsosd``, the daemon is required to execute after the container
+is up.
+
+Example usage:
+
+.. code:: sh
+
+ ## On a service node
+
+ # Pull the container image
+ $ docker pull ovishpc/ldms-agg
+
+ # Start ldmsd container, using host network namespace and host PID namespace;
+ # - with host munge
+ # - serving port 411
+ # - The `-v /on-host/storage:/storage:rw` option is to map on-host storage
+ # location `/on-host/storage` to `/storage` location in the container. The
+ # data written to `/storage/` in the container will persist in
+ # `/on-host/storage/` on the host.
+ $ docker run -d --name=agg --network=host --privileged \
+ -v /run/munge:/run/munge:ro \
+ -v /on-host/storage:/storage:rw \
+ ovishpc/ldms-agg -x sock:411 -a munge
+
+ # Start dsosd service for remote SOS container access (e.g. by UI), by first
+ # bring up a shell inside the container, then start rpcbind and dsosd.
+ $ docker exec agg /bin/bash
+ (agg) $ rpcbind
+ (agg) $ export DSOSD_DIRECTORY=/etc/dsosd.json
+ (agg) $ dsosd >/var/log/dsosd.log 2>&1 &
+ (agg) $ exit
+
+``dsosd.json`` contains a collection of ``container_name`` - ``path``
+mappings for each host. For example:
+
+.. code:: json
+
+ {
+ "host1": {
+ "dsos_cont":"/storage/cont_host1",
+ "tmp_cont":"/tmp/ram_cont"
+ },
+ "host2": {
+ "dsos_cont":"/storage/cont_host2",
+ "tmp_cont":"/tmp/ram_cont"
+ }
+ }
+
+Maestro Container
+-----------------
+
+.. code:: sh
+
+ # SYNOPSIS
+ $ docker run -d --name= --network=host --privileged
+ [ -v /run/munge:/run/munge:ro ] # expose host's munge to the container
+ [ -v /on-host/munge.key:/etc/munge/munge.key:ro ] # use container's munged with custom key
+ -v /on-host/ldms_cfg.yaml:/etc/ldms_cfg.yaml:ro # bind ldms_cfg.yaml, used by maestro_ctrl
+ ovishpc/ldms-maestro # the image name
+
+``ovishpc/ldms-maestro`` containers will run at the least two daemons:
+``etcd`` and ``maestro``. It may also run ``munged`` if host's munge is
+not used (i.e. ``-v /run/munge:/run/munge:ro`` is not given to
+``docker run``). The entrypoint script does the following:
+
+#. starts ``etcd``
+#. starts ``munged`` if host's munge is not used.
+#. execute ``maestro_ctrl`` with ``--ldms_config /etc/ldms_cfg.yaml``.
+ Notice that the ``ldms_cfg.yaml`` file is given by the user by the
+ ``-v`` option.
+#. execute ``maestro`` process. ``maestro`` will periodically connect to
+ all ``ldmsd`` specified by ``ldms_cfg.yaml`` and send the
+ corresponding configuration.
+
+REMARK: For now, the ``etcd`` and ``maestro`` processes in the
+``ovishpc/ldms-maestro`` container run as stand-alone processes. We will
+support a cluster of ``ovishpc/ldms-maestro`` containers in the future.
+
+Example usage:
+
+.. code:: sh
+
+ ## On a service node
+
+ # Pull the container image
+ $ docker pull ovishpc/ldms-maestro
+
+ # Start maestro container, using host network namespace, and using host's munge
+ $ docker run -d --network=host --privileged \
+ -v /run/munge:/run/munge:ro \
+ -v /my/ldms_cfg.yaml:/etc/ldms_cfg.yaml:rw \
+ ovishpc/ldms-maestro
+
+Please see `ldms_cfg.yaml `__ for
+an example.
+
+LDMS UI Back-End Container
+--------------------------
+
+.. code:: sh
+
+ # SYNOPSIS
+ $ docker run -d --name= --network=host --privileged
+ -v /on-host/dsosd.conf:/opt/ovis/etc/dsosd.conf # dsosd.conf file, required to connect to dsosd
+ -v /on-host/settings.py:/opt/ovis/ui/sosgui/settings.py # sosdb-ui Django setting file
+ ovishpc/ldms-ui # the image name
+ [ --http-socket=: ] # addr:port to serve, ":80" by default
+ [ OTHER uWSGI OPTIONS ]
+
+``ovishpc/ldms-ui`` execute ``uwsgi`` process with ``sosgui`` (the
+back-end GUI WSGI module) application module. It is the only process in
+the container. The ``uwsgi`` in this container by default will listen to
+port 80. The ``--http-socket=ADDR:PORT`` will override this behavior.
+Other options given to ``docker run`` will also be passed to the
+``uwsgi`` command as well.
+
+The ``sosgui`` WSGI application requires two configuration files:
+
+#. ``dsosd.conf``: containing a list of hostnames of dsosd, one per
+ line. See `here `__ for an
+ example.
+#. ``settings.py``: containing a WSGI application settings. Please pay
+ attention to DSOS_ROOT and DSOS_CONF. See
+ `here `__ for an example.
+
+Usage example:
+
+.. code:: sh
+
+ ## On a service node
+
+ # Pull the container image
+ $ docker pull ovishpc/ldms-ui
+
+ # Start ldms-ui container, using host network namespace
+ $ docker run -d --name=ui --network=host --privileged \
+ -v /HOST/dsosd.conf:/opt/ovis/etc/dsosd.conf \
+ -v /HOST/settings.py:/opt/ovis/ui/sosgui/settings.py \
+ ovishpc/ldms-ui
+
+LDMS-Grafana Container
+----------------------
+
+.. code:: sh
+
+ # SYNOPSIS
+ $ docker run -d --name= --network=host --privileged
+ [ -v /on-host/grafana.ini:/etc/grafana/grafana.ini:ro ] # custom grafana config
+ [ -e GF_SERVER_HTTP_ADDR= ] # env var to override Grafana IP address binding (default: all addresses)
+ [ -e GF_SERVER_HTTP_PORT= ] # env var to override Grafana port binding (default: 3000)
+ ovishpc/ldms-grafana # the image name
+ [ OTHER GRAFANA-SERVER OPTIONS ] # other options to grafana-server
+
+``ovishpc/ldms-grafana`` is based on
+`grafana/grafana-oss:9.1.0-ubuntu `__
+with Sos data source plugin to access distributed-SOS data. The grafana
+server listens to port 3000 by default. The options specified at the
+``docker run`` CLI will be passed to the ``grafana-server`` command.
+
+.. code:: sh
+
+ ## On a service node
+
+ # Pull the container image
+ $ docker pull ovishpc/ldms-grafana
+
+ # Start ldms-grafana container, this will use port 3000
+ $ docker run -d --name=grafana --privileged --network=host ovishpc/ldms-grafana
+
+ # Use a web browser to navigate to http://HOSTNAME:3000 to access grafana
+
+SSH port forwarding to grafana
+------------------------------
+
+In the case that the grafana server cannot be accessed directly, use SSH
+port forwarding as follows:
+
+.. code:: sh
+
+ (laptop) $ ssh -L 127.0.0.1:3000:127.0.0.1:3000 LOGIN_NODE
+ (LOGIN_HODE) $ ssh -L 127.0.0.1:3000:127.0.0.1:3000 G_HOST
+ # Assuming that the ldms-grafana container is running on G_HOST.
+
+Then, you should be able to access the grafana web server via
+``http://127.0.0.1:3000/`` on your laptop.
+
+Building Containers
+-------------------
+
+TL;DR: edit `config.sh `__, customize the ``*_REPO``,
+``*_BRANCH`` and ``*_OPTIONS``, then run ``./scripts/build-all.sh``.
+
+The following steps describe the building process executed by the
+`scripts/build-all.sh `__ script:
+
+#. Build ``ovishpc/ldms-dev`` docker image. This "development" image
+ contains development programs and libraries for building
+ ``/opt/ovis`` binaries and ``dsosds``.
+
+ - See
+ `recipes/ldms-dev/docker-build.sh `__
+ and `recipes/ldms-dev/Dockerfile `__.
+
+#. Build ``/opt/ovis`` binaries with
+ `scripts/build-ovis-binaries.sh `__
+ script. The environment variables specified in
+ `config.sh `__ file inform the build script which
+ reposositories or branches to check out and build. The variables
+ categorized by the components are as follows:
+
+ - ovis: the main component of OVIS project (``ldmsd`` and LDMS
+ python)
+
+ - ``OVIS_REPO``
+ - ``OVIS_BRANCH``
+
+ - sos: the Scalable Object Storage technology
+
+ - ``SOS_REPO``
+ - ``SOS_BRANCH``
+
+ - maestro: the ``ldmsd`` cluster configurator
+
+ - ``MAESTRO_REPO``
+ - ``MAESTRO_BRANCH``
+
+ - numsos:
+
+ - ``NUMSOS_REPO``
+ - ``NUMSOS_BRANCH``
+
+ - sosdb-ui:
+
+ - ``SOSDBUI_REPO``
+ - ``SOSDBUI_BRANCH``
+
+ - sosdb-grafana:
+
+ - ``SOSDBGRAFANA_REPO``
+ - ``SOSDBGRAFANA_BRANCH`` The binaries output directory
+ (absolute, or relative to the top source directory) is
+ specified by the ``OVIS`` variable in
+ `config.sh `__.
+
+#. Build ``dsosds`` grafana data source plugin for SOS data access with
+ `scripts/build-dsosds.sh `__. The following
+ envronment variables in `config.sh `__ determine which
+ repository and branch to check the code out for building ``dsosds``:
+
+ - ``DSOSDS_REPO``
+ - ``DSOSDS_BRANCH`` The ``dsosds`` output directory (absolute, or
+ relative to the top source directory) is specified by ``DSOSDS``
+ variable in `config.sh `__.
+
+#. Build ``ovishpc/ldms-samp`` image using the ``ovis`` binaries built
+ in step 2. The LDMS Sampler Image contains only ``ldmsd``, the
+ sampler plugins and their dependencies. The storage plugins are not
+ included.
+
+ - See
+ `recipes/ldms-samp/docker-build.sh `__
+ and
+ `recipes/ldms-samp/Dockerfile `__.
+ - Also see ``OVIS_OPTIONS`` in `config.sh `__ for the
+ build options that enable/disable plugins.
+
+#. Build ``ovishpc/ldms-agg`` image using the ``ovis`` binaries built in
+ step 2. The LDMS Aggregator Image contains SOS, ``ldmsd`` and all
+ plugins (both samplers and stores).
+
+ - See
+ `recipes/ldms-agg/docker-build.sh `__
+ and `recipes/ldms-agg/Dockerfile `__.
+ - Also see ``OVIS_OPTIONS`` in `config.sh `__ for the
+ build options that enable/disable plugins.
+
+#. Build ``ovishpc/ldms-maestro`` image using the maestro binaries from
+ ``ovis`` binaries built in step 2. This image also includes ``etcd``,
+ a dependency of ``maestro``.
+
+ - See
+ `recipes/ldms-maestro/docker-build.sh `__
+ and
+ `recipes/ldms-maestro/Dockerfile `__.
+
+#. Build ``ovishpc/ldms-ui`` image using the UI components from ``ovis``
+ binaries built in step 2 (``ovis/ui/``). The image includes ``uwsgi``
+ web server that is used to serve ``sosdb-ui`` Django application,
+ providing SOS data access over HTTP.
+
+ - See
+ `recipes/ldms-ui/docker-build.sh `__
+ and `recipes/ldms-ui/Dockerfile `__.
+
+#. Build ``ovishpc/ldms-grafana`` image based on ``grafana`` image and
+ include ``dsosds`` grafana data source plugin built in step 3. A
+ container that instantiates from this image is bacially a grafana
+ server with ``dsosds`` data source plugin pre-installed.
+
+ - See
+ `recipes/ldms-grafana/docker-build.sh `__
+ and
+ `recipes/ldms-grafana/Dockerfile `__.
+
+Note that many of the ``docker-build.sh`` scripts use ``tar`` to create
+docker build context (a set of files / directories for Docker Build
+process to ADD) instead of using the working directory that contains
+``Dockerfile``. This is so that we don't have to copy the selected files
+from ``ovis`` into each of the ``Dockerfile`` directories.
+
+It is also possible to manually run an ``ovishpc/ldms-dev`` container
+and build your version of ``ovis`` (e.g. creating a new plugin) and
+package a custom ``ovishpc/ldms-samp`` with
+``recipes/ldms-samp/docker-buildingn.sh`` because the
+``docker-building.sh`` script uses whatever binaries available in the
+``ovis`` directory.
diff --git a/rtd/docs/source/contributing/docreqs.rst b/rtd/docs/source/contributing/docreqs.rst
new file mode 100644
index 000000000..444c3a2f0
--- /dev/null
+++ b/rtd/docs/source/contributing/docreqs.rst
@@ -0,0 +1,2 @@
+Documentation Requirements for Contributions
+===========================================
diff --git a/rtd/docs/source/contributing/index.rst b/rtd/docs/source/contributing/index.rst
new file mode 100644
index 000000000..77499458d
--- /dev/null
+++ b/rtd/docs/source/contributing/index.rst
@@ -0,0 +1,10 @@
+Contributing to LDMS
+====
+
+.. toctree::
+ :maxdepth: 2
+
+ samplerwrite
+ storewrite
+ docreqs
+
diff --git a/rtd/docs/source/contributing/samplerwrite.rst b/rtd/docs/source/contributing/samplerwrite.rst
new file mode 100644
index 000000000..397a34d97
--- /dev/null
+++ b/rtd/docs/source/contributing/samplerwrite.rst
@@ -0,0 +1,2 @@
+How to write an LDMS Sampler Plugin
+====================================
diff --git a/rtd/docs/source/contributing/storewrite.rst b/rtd/docs/source/contributing/storewrite.rst
new file mode 100644
index 000000000..b95a2fe46
--- /dev/null
+++ b/rtd/docs/source/contributing/storewrite.rst
@@ -0,0 +1,2 @@
+How to write an LDMS Store Plugin
+====================================
diff --git a/rtd/docs/source/deployment/index.rst b/rtd/docs/source/deployment/index.rst
new file mode 100644
index 000000000..54dee431b
--- /dev/null
+++ b/rtd/docs/source/deployment/index.rst
@@ -0,0 +1,9 @@
+LDMS Deployment
+===============
+This section covers how to deploy and test LDMS
+
+.. toctree::
+ :maxdepth: 2
+
+ ldms-test
+ ldms-jenkins
diff --git a/rtd/docs/source/deployment/ldms-jenkins.rst b/rtd/docs/source/deployment/ldms-jenkins.rst
new file mode 100644
index 000000000..2c3f77fce
--- /dev/null
+++ b/rtd/docs/source/deployment/ldms-jenkins.rst
@@ -0,0 +1,2 @@
+LDMS Build, Install and RPM Testing on Jenkins
+-----------------------------------------------
diff --git a/rtd/docs/source/deployment/ldms-test.rst b/rtd/docs/source/deployment/ldms-test.rst
new file mode 100644
index 000000000..88966742f
--- /dev/null
+++ b/rtd/docs/source/deployment/ldms-test.rst
@@ -0,0 +1,3 @@
+Github Repository for LDMS Functional Testing
+----------------------------------------------
+
diff --git a/rtd/docs/source/images/appsysfusion.png b/rtd/docs/source/images/appsysfusion.png
new file mode 100644
index 000000000..28769ee14
Binary files /dev/null and b/rtd/docs/source/images/appsysfusion.png differ
diff --git a/rtd/docs/source/images/darshanConnector.png b/rtd/docs/source/images/darshanConnector.png
new file mode 100644
index 000000000..173411a3b
Binary files /dev/null and b/rtd/docs/source/images/darshanConnector.png differ
diff --git a/rtd/docs/source/images/grafana/grafana_output.png b/rtd/docs/source/images/grafana/grafana_output.png
new file mode 100644
index 000000000..293eec6e7
Binary files /dev/null and b/rtd/docs/source/images/grafana/grafana_output.png differ
diff --git a/rtd/docs/source/images/grafana/grafana_query.png b/rtd/docs/source/images/grafana/grafana_query.png
new file mode 100644
index 000000000..96acc8693
Binary files /dev/null and b/rtd/docs/source/images/grafana/grafana_query.png differ
diff --git a/rtd/docs/source/images/grafana/grafana_time.png b/rtd/docs/source/images/grafana/grafana_time.png
new file mode 100644
index 000000000..3e15059f0
Binary files /dev/null and b/rtd/docs/source/images/grafana/grafana_time.png differ
diff --git a/rtd/docs/source/images/grafana/grafana_timerange.png b/rtd/docs/source/images/grafana/grafana_timerange.png
new file mode 100644
index 000000000..73af4aff7
Binary files /dev/null and b/rtd/docs/source/images/grafana/grafana_timerange.png differ
diff --git a/rtd/docs/source/images/grafana/grafanapanel.png b/rtd/docs/source/images/grafana/grafanapanel.png
new file mode 100644
index 000000000..6e2133537
Binary files /dev/null and b/rtd/docs/source/images/grafana/grafanapanel.png differ
diff --git a/rtd/docs/source/images/grafana/grafanapanel_variables.png b/rtd/docs/source/images/grafana/grafanapanel_variables.png
new file mode 100644
index 000000000..cc978d890
Binary files /dev/null and b/rtd/docs/source/images/grafana/grafanapanel_variables.png differ
diff --git a/rtd/docs/source/images/ldmscon/ldmscon2020tutorial.png b/rtd/docs/source/images/ldmscon/ldmscon2020tutorial.png
new file mode 100644
index 000000000..29807db12
Binary files /dev/null and b/rtd/docs/source/images/ldmscon/ldmscon2020tutorial.png differ
diff --git a/rtd/docs/source/images/ldmscon/ldmscon2021pres.PNG b/rtd/docs/source/images/ldmscon/ldmscon2021pres.PNG
new file mode 100644
index 000000000..6643f2a7d
Binary files /dev/null and b/rtd/docs/source/images/ldmscon/ldmscon2021pres.PNG differ
diff --git a/rtd/docs/source/images/ldmscon/ldmscon2021tutorial.PNG b/rtd/docs/source/images/ldmscon/ldmscon2021tutorial.PNG
new file mode 100644
index 000000000..3e8631434
Binary files /dev/null and b/rtd/docs/source/images/ldmscon/ldmscon2021tutorial.PNG differ
diff --git a/rtd/docs/source/images/ldmscon/ldmscon2022pres.PNG b/rtd/docs/source/images/ldmscon/ldmscon2022pres.PNG
new file mode 100644
index 000000000..ace4b1891
Binary files /dev/null and b/rtd/docs/source/images/ldmscon/ldmscon2022pres.PNG differ
diff --git a/rtd/docs/source/images/ldmscon/ldmscon2022tutorial.PNG b/rtd/docs/source/images/ldmscon/ldmscon2022tutorial.PNG
new file mode 100644
index 000000000..695707372
Binary files /dev/null and b/rtd/docs/source/images/ldmscon/ldmscon2022tutorial.PNG differ
diff --git a/rtd/docs/source/images/ldmscon/ldmscon2023pres.PNG b/rtd/docs/source/images/ldmscon/ldmscon2023pres.PNG
new file mode 100644
index 000000000..12bd57e8e
Binary files /dev/null and b/rtd/docs/source/images/ldmscon/ldmscon2023pres.PNG differ
diff --git a/rtd/docs/source/images/ldmscon/ldmscon2023tutorial.png b/rtd/docs/source/images/ldmscon/ldmscon2023tutorial.png
new file mode 100644
index 000000000..a19458edf
Binary files /dev/null and b/rtd/docs/source/images/ldmscon/ldmscon2023tutorial.png differ
diff --git a/rtd/docs/source/images/ovis-hpc_homepage.png b/rtd/docs/source/images/ovis-hpc_homepage.png
new file mode 100644
index 000000000..a50e74bc6
Binary files /dev/null and b/rtd/docs/source/images/ovis-hpc_homepage.png differ
diff --git a/rtd/docs/source/index.rst b/rtd/docs/source/index.rst
new file mode 100644
index 000000000..1c5b2b950
--- /dev/null
+++ b/rtd/docs/source/index.rst
@@ -0,0 +1,67 @@
+.. Copyright 2023 Sandia National Laboratories, LLC
+ (c.f. AUTHORS, NOTICE.LLNS, COPYING)
+
+ SPDX-License-Identifier: (LGPL-3.0)
+
+.. Flux documentation master file, created by
+ sphinx-quickstart on Fri Jan 10 15:11:07 2020.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+Welcome To OVIS-HPC Documentation!
+====================================
+.. image:: https://github.com/ovis-hpc/readthedocs/blob/main/docs/source/images/ovis-logo.png?raw=true
+ :width: 225
+ :height: 250
+ :align: center
+
+**OVIS** is a modular system for HPC data collection, transport, storage, analysis, visualization, and log message exploration. The Lightweight Distributed Metric Service (**LDMS**) is a scalable low-overhead, low-latency framework for collection, movement, and storage of metric/event data on distributed computer systems.
+
+.. toctree::
+ :maxdepth: 2
+ :caption: OVIS and Group Activity
+
+ About Ovis
+ LDMS Users Group Conference (LDMSCON)
+ LDSM Users Group
+ OVIS Publications
+
+.. toctree::
+ :maxdepth: 4
+ :caption: OVIS Components
+
+ ldms-index
+ SOS
+ Maestro
+ Baler
+ ASF
+
+.. toctree::
+ :maxdepth: 6
+ :caption: Deployment
+
+ LDMS
+ SOS
+ Maestro
+ Baler
+ ASF
+
+
+Other Projects
+====================================
+
+`ldms `_
+`ovis-publications `_
+`maestro `_
+`sos `_
+`baler `_
+
+
+
+
+
+
+
+
+
+
diff --git a/rtd/docs/source/ldms-index.rst b/rtd/docs/source/ldms-index.rst
new file mode 100644
index 000000000..0c5f28033
--- /dev/null
+++ b/rtd/docs/source/ldms-index.rst
@@ -0,0 +1,46 @@
+LDMS
+======
+
+.. image:: images/ovis-hpc_homepage.png
+ :width: 1000
+ :height: 150
+
+LDMS GitHub: https://github.com/ovis-hpc/ovis
+
+To join the LDMS Users Group Mailing List: https://github.com/ovis-hpc/ovis-wiki/wiki/Mailing-Lists
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Introduction To LDMS
+
+ ldms-quickstart
+ ldms-tutorial
+ ldms-streams
+ container-quickstart
+
+.. toctree::
+ :maxdepth: 2
+ :caption: LDMS Man Pages
+
+ ldms_man/index
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Sampler Plugin Man Pages
+
+ sampler_man/index
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Store Plugin Man Pages
+
+ store_man/index
+
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contributing to LDMS
+
+ contributing/index
+
+
diff --git a/rtd/docs/source/ldms-quickstart.rst b/rtd/docs/source/ldms-quickstart.rst
new file mode 100644
index 000000000..1670aa793
--- /dev/null
+++ b/rtd/docs/source/ldms-quickstart.rst
@@ -0,0 +1,623 @@
+LDMS Quick Start
+###########################
+
+Installation
+*****************
+
+AlmaLinux8
+------------
+
+Prerequisites
+
+***********************
+* AlmaLinux8 (AlmaLinux is binary compatible with RHEL®)
+* openssl-dev
+* gnu compiler
+* swig
+* autoconf
+* libtool
+* readline
+* readline-devel
+* libevent
+* libevent-dev
+* autogen-libopts
+* gettext
+* python3.8
+* python38-Cython
+* python38-libs
+* glib2-devel
+* git
+* bison
+* make
+* byacc
+* flex
+
+Prerequisite Installation
+---------------------------
+The following steps were ran on AlmaLinux8 arm64v8
+
+.. code-block:: RST
+
+ sudo dnf update -y
+ sudo dnf install -y openssl
+ sudo dnf install -y openssl-devel
+ sudo dnf install -y swig
+ sudo dnf install -y libtool
+ sudo dnf install -y readline
+ sudo dnf install -y readline-devel
+ sudo dnf install -y libevent
+ sudo dnf install -y libevent-devel
+ sudo dnf install -y autogen-libopts
+ sudo dnf install -y gettext.a
+ sudo dnf install -y glib2
+ sudo dnf install -y glib2-devel
+ sudo dnf install -y git
+ sudo dnf install -y bison
+ sudo dnf install -y make
+ sudo dnf install -y byacc
+ sudo dnf install -y flex
+ sudo dnf install -y python38
+ sudo dnf install -y python38-devel
+ sudo dnf install -y python38-Cython
+ sudo dnf install -y python38-libs
+
+
+RHEL 9
+------------
+
+Prerequisites
+=============
+* RHEL 9
+* openssl-devel
+* pkg-config
+* automake
+* libtool
+* python3 (or higher)
+* python3-devel (or higher)
+* cython
+* bison
+* flex
+
+Prerequisite Installation
+---------------------------
+The following steps were ran on a basic RHEL 9 instance via AWS.
+
+.. code-block:: RST
+
+ sudo yum update -y
+ sudo yum install automake -y
+ sudo yum install openssl-devel -y
+ sudo yum install pkg-config -y
+ sudo yum install libtool -y
+ sudo yum install python3 -y
+ sudo yum install python3-devel.x86_64 -y
+ sudo yum install python3-Cython -y
+ sudo yum install make -y
+ sudo yum install bison -y
+ sudo yum install flex -y
+
+
+LDMS Source Installation Instructions
+--------------------------
+
+Getting the Source
+
+***********************
+* This example shows cloning into $HOME/Source/ovis-4 and installing into $HOME/ovis/4.4.2
+
+.. code-block:: RST
+
+ mkdir $HOME/Source
+ mkdir $HOME/ovis
+ cd $HOME/Source
+ git clone -b OVIS-4.4.2 https://github.com/ovis-hpc/ovis.git ovis-4
+
+Building the Source
+-----------------------
+
+* Run autogen.sh
+.. code-block:: RST
+
+ cd $HOME/Source/ovis
+ ./autogen.sh
+
+* Configure and Build (Builds default linux samplers. Build installation directory is prefix):
+
+.. code-block:: RST
+
+ mkdir build
+ cd build
+ ../configure --prefix=$HOME/ovis/4.4.2
+ make
+ make install
+
+Basic Configuration and Running
+*******************************
+* Set up environment:
+
+.. code-block:: RST
+
+ OVIS=$HOME/ovis/4.4.2
+ export LD_LIBRARY_PATH=$OVIS/lib:$LD_LIBRARY_PATH
+ export LDMSD_PLUGIN_LIBPATH=$OVIS/lib/ovis-ldms
+ export ZAP_LIBPATH=$OVIS/lib/ovis-ldms
+ export PATH=$OVIS/sbin:$OVIS/bin:$PATH
+ export PYTHONPATH=$OVIS/lib/python3.8/site-packages
+
+Sampler
+***********************
+* Edit a new configuration file, named `sampler.conf`, to load the `meminfo` and `vmstat` samplers. For this example, it can be saved anywhere, but it will be used later to start the LDMS Daemon (`ldmsd`)
+
+The following configuration employs generic hostname, uid, gid, component id, and permissions octal set values.
+
+Sampling intervals are set using a "microsecond" time unit (i.e., 1 sec=1e+6 µs), and are adjustable, as needed.
+Some suggestions include:
+
+.. list-table:: LDMS Sampler Plugin Interval Settings
+ :widths: 25 25 25
+ :header-rows: 1
+
+ * - Sampler
+ - Seconds (sec)
+ - Microseconds (µs)
+ * - Power
+ - 0.1 sec
+ - 100000 µs
+ * - Meminfo
+ - 1.0 sec
+ - 1000000 µs
+ * - VMstat
+ - 10 sec
+ - 10000000 µs
+ * - Link Status
+ - 60 sec
+ - 60000000 µs
+
+
+.. note::
+ Sampling offset is typically set to 0 for sampler plugins.
+
+
+.. code-block:: RST
+ :linenos:
+
+ # Meminfo Sampler Plugin using 1 second sampling interval
+ load name=meminfo
+ config name=meminfo producer=host1 instance=host1/meminfo component_id=1 schema=meminfo job_set=host1/jobinfo uid=12345 gid=12345 perm=0755
+ start name=meminfo interval=1000000 offset=0
+ # VMStat Sampler Plugin using 10 second sampling interval
+ load name=vmstat
+ config name=vmstat producer=host1 instance=host1/vmstat component_id=1 schema=vmstat job_set=host1/jobinfo uid=0 gid=0 perm=0755
+ start name=vmstat interval=10000000 offset=0
+
+As an alternative to the configuration above, one may, instead, export environmental variables to set LDMS's runtime configuration by using variables to reference those values in the sampler configuration file.
+
+The following setup will set the samplers to collect at 1 second, (i.e., 1000000 µs) intervals:
+
+.. code-block:: RST
+
+ export HOSTNAME=${HOSTNAME:=$(hostname -s)} #Typically already is set, set if not
+ export COMPONENT_ID=1
+ export SAMPLE_INTERVAL=1000000
+ export SAMPLE_OFFSET=50000
+
+.. code-block:: RST
+ :linenos:
+
+ # Meminfo Sampler Plugin using environment variables HOSTNAME, COMPONENT_ID, SAMPLE_INTERVAL, and SAMPLE_OFFSET
+ load name=meminfo
+ config name=meminfo producer=${HOSTNAME} instance=${HOSTNAME}/meminfo component_id=${COMPONENT_ID} schema=meminfo job_set=${HOSTNAME}/jobinfo uid=12345 gid=12345 perm=0755
+ start name=meminfo interval=${SAMPLE_INTERVAL} offset=${SAMPLE_OFFSET}
+ # VMStat Sampler Plugin using environment variables HOSTNAME, COMPONENT_ID, SAMPLE_INTERVAL, and SAMPLE_OFFSET
+ load name=vmstat
+ config name=vmstat producer=${HOSTNAME} instance=${HOSTNAME}/vmstat component_id=${COMPONENT_ID} schema=vmstat job_set=${HOSTNAME}/jobinfo uid=0 gid=0 perm=0755
+ start name=vmstat interval=${SAMPLE_INTERVAL} offset=${SAMPLE_OFFSET}
+
+* Run a daemon using munge authentication:
+
+.. code-block:: RST
+
+ ldmsd -x sock:10444 -c sampler.conf -l /tmp/demo_ldmsd.log -v DEBUG -a munge -r $(pwd)/ldmsd.pid
+
+Or in non-cluster environments where munge is unavailable:
+
+.. code-block:: RST
+
+ ldmsd -x sock:10444 -c sampler.conf -l /tmp/demo_ldmsd.log -v DEBUG -r $(pwd)/ldmsd.pid
+
+.. note::
+ For the rest of these instructions, omit the "-a munge" if you do not have munge running. This will also write out DEBUG-level information to the specified (-l) log.
+
+* Run ldms_ls on that node to see set, meta-data, and contents:
+
+.. code-block:: RST
+
+ ldms_ls -h localhost -x sock -p 10444 -a munge
+ ldms_ls -h localhost -x sock -p 10444 -v -a munge
+ ldms_ls -h localhost -x sock -p 10444 -l -a munge
+
+.. note::
+ Note the use of munge. Users will not be able to query a daemon launched with munge if not querying with munge. Users will only be able to see sets as allowed by the permissions in response to `ldms_ls`.
+
+Example (note permissions and update hint):
+
+.. code-block:: RST
+
+ ldms_ls -h localhost -x sock -p 10444 -l -v -a munge
+
+Output:
+
+.. code-block:: RST
+
+ host1/vmstat: consistent, last update: Mon Oct 22 16:58:15 2018 -0600 [1385us]
+ APPLICATION SET INFORMATION ------
+ updt_hint_us : 5000000:0
+ METADATA --------
+ Producer Name : host1
+ Instance Name : host1/vmstat
+ Schema Name : vmstat
+ Size : 5008
+ Metric Count : 110
+ GN : 2
+ User : root(0)
+ Group : root(0)
+ Permissions : -rwxr-xr-x
+ DATA ------------
+ Timestamp : Mon Oct 22 16:58:15 2018 -0600 [1385us]
+ Duration : [0.000106s]
+ Consistent : TRUE
+ Size : 928
+ GN : 110
+ -----------------
+ M u64 component_id 1
+ D u64 job_id 0
+ D u64 app_id 0
+ D u64 nr_free_pages 32522123
+ ...
+ D u64 pglazyfree 1082699829
+ host1/meminfo: consistent, last update: Mon Oct 22 16:58:15 2018 -0600 [1278us]
+ APPLICATION SET INFORMATION ------
+ updt_hint_us : 5000000:0
+ METADATA --------
+ Producer Name : host1
+ Instance Name : host1/meminfo
+ Schema Name : meminfo
+ Size : 1952
+ Metric Count : 46
+ GN : 2
+ User : myuser(12345)
+ Group : myuser(12345)
+ Permissions : -rwx------
+ DATA ------------
+ Timestamp : Mon Oct 22 16:58:15 2018 -0600 [1278us]
+ Duration : [0.000032s]
+ Consistent : TRUE
+ Size : 416
+ GN : 46
+ -----------------
+ M u64 component_id 1
+ D u64 job_id 0
+ D u64 app_id 0
+ D u64 MemTotal 131899616
+ D u64 MemFree 130088492
+ D u64 MemAvailable 129556912
+ ...
+ D u64 DirectMap1G 134217728
+
+
+Aggregator Using Data Pull
+***********************
+* Start another sampler daemon with a similar configuration on host2 using component_id=2, as above.
+* Make a configuration file (called agg11.conf) to aggregate from the two samplers at different intervals with the following contents:
+
+.. code-block:: RST
+ :linenos:
+
+ prdcr_add name=host1 host=host1 type=active xprt=sock port=10444 interval=20000000
+ prdcr_start name=host1
+ updtr_add name=policy_h1 interval=1000000 offset=100000
+ updtr_prdcr_add name=policy_h1 regex=host1
+ updtr_start name=policy_h1
+ prdcr_add name=host2 host=host2 type=active xprt=sock port=10444 interval=20000000
+ prdcr_start name=host2
+ updtr_add name=policy_h2 interval=2000000 offset=100000
+ updtr_prdcr_add name=policy_h2 regex=host2
+ updtr_start name=policy_h2
+
+* On host3, set up the environment as above and run a daemon:
+
+.. code-block:: RST
+
+ ldmsd -x sock:10445 -c agg11.conf -l /tmp/demo_ldmsd.log -v ERROR -a munge
+
+
+* Run `ldms_ls` on the aggregator node to see set listing:
+
+.. code-block:: RST
+
+ ldms_ls -h localhost -x sock -p 10445 -a munge
+
+Output:
+
+.. code-block:: RST
+
+ host1/meminfo
+ host1/vmstat
+ host2/meminfo
+ host2/vmstat
+
+You can also run `ldms_ls` to query the ldms daemon on the remote node:
+
+.. code-block:: RST
+
+ ldms_ls -h host1 -x sock -p 10444 -a munge
+
+Output:
+
+.. code-block:: RST
+
+ host1/meminfo
+ host1/vmstat
+
+
+.. note::
+ `ldms_ls -l` shows the detailed output, including timestamps. This can be used to verify that the aggregator is aggregating the two hosts' sets at different intervals.
+
+Aggregator Using Data Push
+***********************
+* Use same sampler configurations as above.
+* Make a configuration file (called agg11_push.conf) to cause the two samplers to push their data to the aggregator as they update.
+
+ * Note that the prdcr configs remain the same as above but the updater_add includes the additional options: push=onchange auto_interval=false.
+
+ * Note that the updtr_add interval has no effect in this case but is currently required due to syntax checking
+
+.. code-block:: RST
+
+ prdcr_add name=host1 host=host1 type=active xprt=sock port=10444 interval=20000000
+ prdcr_start name=host1
+ prdcr_add name=host2 host=host2 type=active xprt=sock port=10444 interval=20000000
+ prdcr_start name=host2
+ updtr_add name=policy_all interval=5000000 push=onchange auto_interval=false
+ updtr_prdcr_add name=policy_all regex=.*
+ updtr_start name=policy_all
+
+
+* On host3, set up the environment as above and run a daemon:
+
+.. code-block:: RST
+
+ ldmsd -x sock:10445 -c agg11_push.conf -l /tmp/demo_ldmsd_log -v DEBUG -a munge
+
+* Run ldms_ls on the aggregator node to see set listing:
+
+.. code-block:: RST
+
+ ldms_ls -h localhost -x sock -p 10445 -a munge
+
+Output:
+
+.. code-block:: RST
+
+ host1/meminfo
+ host1/vmstat
+ host2/meminfo
+ host2/vmstat
+
+
+Two Aggregators Configured as Failover Pairs
+***********************
+* Use same sampler configurations as above
+* Make a configuration file (called agg11.conf) to aggregate from one sampler with the following contents:
+
+.. code-block:: RST
+
+ prdcr_add name=host1 host=host1 type=active xprt=sock port=10444 interval=20000000
+ prdcr_start name=host1
+ updtr_add name=policy_all interval=1000000 offset=100000
+ updtr_prdcr_add name=policy_all regex=.*
+ updtr_start name=policy_all
+ failover_config host=host3 port=10446 xprt=sock type=active interval=1000000 peer_name=agg12 timeout_factor=2
+ failover_start
+
+* On host3, set up the environment as above and run two daemons as follows:
+
+.. code-block:: RST
+
+ ldmsd -x sock:10445 -c agg11.conf -l /tmp/demo_ldmsd_log -v ERROR -n agg11 -a munge
+ ldmsd -x sock:10446 -c agg12.conf -l /tmp/demo_ldmsd_log -v ERROR -n agg12 -a munge
+
+* Run ldms_ls on each aggregator node to see set listing:
+
+.. code-block:: RST
+
+ ldms_ls -h localhost -x sock -p 10445 -a munge
+ host1/meminfo
+ host1/vmstat
+ ldms_ls -h localhost -x sock -p 10446 -a munge
+ host2/meminfo
+ host2/vmstat
+
+* Kill one daemon:
+
+.. code-block:: RST
+
+ kill -SIGTERM
+
+* Make sure it died
+* Run ldms_ls on the remaining aggregator to see set listing:
+
+.. code-block:: RST
+
+ ldms_ls -h localhost -x sock -p 10446 -a munge
+
+Output:
+
+.. code-block:: RST
+
+ host1/meminfo
+ host1/vmstat
+ host2/meminfo
+ host2/vmstat
+
+Set Groups
+***********************
+A set group is an LDMS set with special information to represent a group of sets inside ldmsd. A set group would appear as a regular LDMS set to other LDMS applications, but ldmsd and `ldms_ls` will treat it as a collection of LDMS sets. If ldmsd updtr updates a set group, it also subsequently updates all the member sets. Performing ldms_ls -l on a set group will also subsequently perform a long-query all the sets in the group.
+
+To illustrate how a set group works, we will configure 2 sampler daemons with set groups and 1 aggregator daemon that updates and stores the groups in the following subsections.
+
+Creating a set group and inserting sets into it
+***********************
+The following is a configuration file for our s0 LDMS daemon (sampler #0) that collects sda disk stats in the s0/sda set and lo network usage in the s0/lo set. The s0/grp set group is created to contain both s0/sda and s0/lo.
+
+.. code-block:: RST
+
+ ### s0.conf
+ load name=procdiskstats
+ config name=procdiskstats device=sda producer=s0 instance=s0/sda
+ start name=procdiskstats interval=1000000 offset=0
+
+ load name=procnetdev
+ config name=procnetdev ifaces=lo producer=s0 instance=s0/lo
+ start name=procnetdev interval=1000000 offset=0
+
+ setgroup_add name=s0/grp producer=s0 interval=1000000 offset=0
+ setgroup_ins name=s0/grp instance=s0/sda,s0/lo
+
+The following is the same for s1 sampler daemon, but with different devices (sdb and eno1).
+
+.. code-block:: RST
+
+ ### s1.conf
+ load name=procdiskstats
+ config name=procdiskstats device=sdb producer=s1 instance=s1/sdb
+ start name=procdiskstats interval=1000000 offset=0
+
+ load name=procnetdev
+ config name=procnetdev ifaces=eno1 producer=s1 instance=s1/eno1
+ start name=procnetdev interval=1000000 offset=0
+
+ setgroup_add name=s1/grp producer=s1 interval=1000000 offset=0
+ setgroup_ins name=s1/grp instance=s1/sdb,s1/eno1
+
+The s0 LDMS daemon is listening on port 10000 and the s1 LDMS daemon is listening on port 10001.
+
+Perform `ldms_ls` on a group
+***********************
+Performing `ldms_ls -v` or `ldms_ls -l` on a LDMS daemon hosting a group will perform the query on the set representing the group itself as well as iteratively querying the group's members.
+
+Example:
+
+.. code-block:: RST
+
+ ldms_ls -h localhost -x sock -p 10000
+
+Output:
+
+.. code-block:: RST
+
+ ldms_ls -h localhost -x sock -p 10000 -v s0/grp | grep consistent
+
+Output:
+
+.. code-block:: RST
+
+ s0/grp: consistent, last update: Mon May 20 15:44:30 2019 -0500 [511879us]
+ s0/lo: consistent, last update: Mon May 20 16:13:16 2019 -0500 [1126us]
+ s0/sda: consistent, last update: Mon May 20 16:13:17 2019 -0500 [1176us]
+
+.. code-block:: RST
+
+ ldms_ls -h localhost -x sock -p 10000 -v s0/lo | grep consistent # only query lo set from set group s0
+
+.. note::
+ The update time of the group set is the time that the last set was inserted into the group.
+
+Update / store with set group
+***********************
+The following is an example of an aggregator configuration to match-update only the set groups, and their members, with storage policies:
+
+.. code-block:: RST
+
+ # Stores
+ load name=store_csv
+ config name=store_csv path=csv
+ # strgp for netdev, csv file: "./csv/net/procnetdev"
+ strgp_add name=store_net plugin=store_csv container=net schema=procnetdev
+ strgp_prdcr_add name=store_net regex=.*
+ strgp_start name=store_net
+ # strgp for diskstats, csv file: "./csv/disk/procdiskstats"
+ strgp_add name=store_disk plugin=store_csv container=disk schema=procdiskstats
+ strgp_prdcr_add name=store_disk regex=.*
+ strgp_start name=store_disk
+
+ # Updater that updates only groups
+ updtr_add name=u interval=1000000 offset=500000
+ updtr_match_add name=u regex=ldmsd_grp_schema match=schema
+ updtr_prdcr_add name=u regex=.*
+ updtr_start name=u
+
+Performing `ldms_ls` on the LDMS aggregator daemon exposes all the sets (including groups)
+
+.. code-block:: RST
+
+ ldms_ls -h localhost -x sock -p 9000
+
+Output:
+
+.. code-block:: RST
+
+ s1/sdb
+ s1/grp
+ s1/eno1
+ s0/sda
+ s0/lo
+ s0/grp
+
+Performing `ldms_ls -v` on a LDMS daemon hosting a group again but only querying the group and its members:
+
+.. code-block:: RST
+
+ ldms_ls -h localhost -x sock -p 9000 -v s1/grp | grep consistent
+
+Output:
+
+.. code-block:: RST
+
+ s1/grp: consistent, last update: Mon May 20 15:42:34 2019 -0500 [891643us]
+ s1/sdb: consistent, last update: Mon May 20 16:38:38 2019 -0500 [1805us]
+ s1/eno1: consistent, last update: Mon May 20 16:38:38 2019 -0500 [1791us]
+
+
+The following is an example of the CSV output:
+
+.. code-block:: RST
+
+ > head csv/*/*
+
+.. code-block:: RST
+
+ #Time,Time_usec,ProducerName,component_id,job_id,app_id,reads_comp#sda,reads_comp.rate#sda,reads_merg#sda,reads_merg.rate#sda,sect_read#sda,sect_read.rate#sda,time_read#sda,time_read.rate#sda,writes_comp#sda,writes_comp.rate#sda,writes_merg#sda,writes_merg.rate#sda,sect_written#sda,sect_written.rate#sda,time_write#sda,time_write.rate#sda,ios_in_progress#sda,ios_in_progress.rate#sda,time_ios#sda,time_ios.rate#sda,weighted_time#sda,weighted_time.rate#sda,disk.byte_read#sda,disk.byte_read.rate#sda,disk.byte_written#sda,disk.byte_written.rate#sda
+ 1558387831.001731,1731,s0,0,0,0,197797,0,9132,0,5382606,0,69312,0,522561,0,446083,0,418086168,0,966856,0,0,0,213096,0,1036080,0,1327776668,0,1380408297,0
+ 1558387832.001943,1943,s1,0,0,0,108887,0,32214,0,1143802,0,439216,0,1,0,0,0,8,0,44,0,0,0,54012,0,439240,0,1309384656,0,1166016512,0
+ 1558387832.001923,1923,s0,0,0,0,197797,0,9132,0,5382606,0,69312,0,522561,0,446083,0,418086168,0,966856,0,0,0,213096,0,1036080,0,1327776668,0,1380408297,0
+ 1558387833.001968,1968,s1,0,0,0,108887,0,32214,0,1143802,0,439216,0,1,0,0,0,8,0,44,0,0,0,54012,0,439240,0,1309384656,0,1166016512,0
+ 1558387833.001955,1955,s0,0,0,0,197797,0,9132,0,5382606,0,69312,0,522561,0,446083,0,418086168,0,966856,0,0,0,213096,0,1036080,0,1327776668,0,1380408297,0
+ 1558387834.001144,1144,s1,0,0,0,108887,0,32214,0,1143802,0,439216,0,1,0,0,0,8,0,44,0,0,0,54012,0,439240,0,1309384656,0,1166016512,0
+ 1558387834.001121,1121,s0,0,0,0,197797,0,9132,0,5382606,0,69312,0,522561,0,446083,0,418086168,0,966856,0,0,0,213096,0,1036080,0,1327776668,0,1380408297,0
+ 1558387835.001179,1179,s0,0,0,0,197797,0,9132,0,5382606,0,69312,0,522561,0,446083,0,418086168,0,966856,0,0,0,213096,0,1036080,0,1327776668,0,1380408297,0
+ 1558387835.001193,1193,s1,0,0,0,108887,0,32214,0,1143802,0,439216,0,1,0,0,0,8,0,44,0,0,0,54012,0,439240,0,1309384656,0,1166016512,0
+
+ ==> csv/net/procnetdev <==
+ #Time,Time_usec,ProducerName,component_id,job_id,app_id,rx_bytes#lo,rx_packets#lo,rx_errs#lo,rx_drop#lo,rx_fifo#lo,rx_frame#lo,rx_compressed#lo,rx_multicast#lo,tx_bytes#lo,tx_packets#lo,tx_errs#lo,tx_drop#lo,tx_fifo#lo,tx_colls#lo,tx_carrier#lo,tx_compressed#lo
+ 1558387831.001798,1798,s0,0,0,0,12328527,100865,0,0,0,0,0,0,12328527,100865,0,0,0,0,0,0
+ 1558387832.001906,1906,s0,0,0,0,12342153,100925,0,0,0,0,0,0,12342153,100925,0,0,0,0,0,0
+ 1558387832.001929,1929,s1,0,0,0,3323644475,2865919,0,0,0,0,0,12898,342874081,1336419,0,0,0,0,0,0
+ 1558387833.002001,2001,s0,0,0,0,12346841,100939,0,0,0,0,0,0,12346841,100939,0,0,0,0,0,0
+ 1558387833.002025,2025,s1,0,0,0,3323644475,2865919,0,0,0,0,0,12898,342874081,1336419,0,0,0,0,0,0
+ 1558387834.001106,1106,s0,0,0,0,12349089,100953,0,0,0,0,0,0,12349089,100953,0,0,0,0,0,0
+ 1558387834.001130,1130,s1,0,0,0,3323647234,2865923,0,0,0,0,0,12898,342875727,1336423,0,0,0,0,0,0
+ 1558387835.001247,1247,s0,0,0,0,12351337,100967,0,0,0,0,0,0,12351337,100967,0,0,0,0,0,0
+ 1558387835.001274,1274,s1,0,0,0,3323647298,2865924,0,0,0,0,0,12898,342875727,1336423,0,0,0,0,0,0
+
+
diff --git a/rtd/docs/source/ldms-streams.rst b/rtd/docs/source/ldms-streams.rst
new file mode 100644
index 000000000..3f4659a05
--- /dev/null
+++ b/rtd/docs/source/ldms-streams.rst
@@ -0,0 +1,836 @@
+Streams-enabled Application Data Collectors
+###########################
+
+Caliper
+***********************
+
+This section covers the basic steps on how to compile, build and use the caliperConnector.
+
+**What Is Caliper?**
+
+A program instrumentation and performance measurement framework that allows users to implement analysiscapabilities (e.g. performance profiling, tracing, monitoring, and auto-tuning) into their applications using Caliper’s annotation API.
+
+**What Is the caliperConnector?**
+
+A Caliper-LDMS functionality that utilizes LDMS Streams to collect Caliper related data and absolute timestamp during runtime. It formats the data to a JSON message and *publishes* it to an LDMS streams interface.
+
+Setup & Configuration
+----------------------
+Build the Caliper program with the application you wish to analyze. No modifications to the Caliper's instrumentations were required to integrate LDMS, so you will just need to follow the build and install instructions from `Caliper's Build and Install Webpage `_
+
+One built, you will need to poin the $LD_LIBRARY_PATH to Caliper's library:
+
+.. code-block:: RST
+
+ LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/lib64
+
+Now, to enable LDMS data collection, set (or export) the following list of caliper variables to ``ldms`` when executing a program. An example is shown below:
+
+.. code-block:: RST
+
+ CALI_LOOP_MONITOR_ITERATION_INTERVAL=10 ./caliper_example.o 400
+ CALI_SERVICES_ENABLE=loop_monitor,mpi,ldms
+
+The ``CALI_LOOP_MONITOR_ITERATION_INTERVAL`` collects measurements every n loop iterations of the acpplicaiton and the ``CALI_SERVICES_ENABLE`` define which services will be combined to collect the data.
+
+Once done, you will just need to execute your program and you will have application data collected by Caliper and LDMS.
+
+.. note::
+
+ The MPI service (i.e., mpi) is required when enabling LDMS because it is used for associating the MPI rank data collected by LDMS.
+
+LDMS Expected Output
+--------------------
+LDMS collects a set of runtime timeseries data of the application in parallel with Caliper. Below is an example output of the data collect, formatted into a JSON string:
+
+.. code-block::
+
+ {"job_id":11878171,"ProducerName":“n1","rank":0,"timestamp":1670373198.056455,"region":"init","time":33.172237 }
+ {"job_id":11878171,"ProducerName":"n1","rank":0,"timestamp":1670373198.056455,"region":"initialization","time":33.211929 }
+ {"job_id":11878171,"ProducerName":“n1","rank":0,"timestamp":1670373198.056455,"region":"main","time":44.147736 }
+ {"job_id":11878171,"ProducerName":“n1","rank":0,"timestamp":1670373203.556555,"region":"main","time":0.049086 }
+ {"job_id":11878171,"ProducerName":“n1","rank":0,"timestamp":1670373203.556555,"region":"run","time":0.049086 }
+
+Any data collected by LDMS should have the same fields as the one shown above and can be viewed in a csv file **if** the LDMS csv_store plugin is configured in the LDMSD aggregator.
+
+.. note::
+ More information about starting and configuring and LDMS daemon to store to CSV can be found in `Run An LDMS Streams Daemon`_ or `LDMS Quickstart `_.
+
+
+
+Darshan
+***********************
+This section covers basics steps on how to compile, build and use the Darshan-LDMS Integration code (i.e. darshanConnector). The following application tests are part of the Darshan program and can be found under ``/darshan/darshan-test/regression/test-cases/src/``.
+
+**What Is Darshan?**
+
+A lightweight I/O characterization tool that transparently captures application I/O behavior from HPC applications with minimal overhead.
+
+**What Is The darshanConnector?**
+
+A Darshan-LDMS functionality that utilizes LDMS Streams to collect Darshan’s original I/O tracing, Darshan’s eXtended tracing (DXT) and absolute timestamp during runtime. It formats the data to a JSON message and *publishes* it to an LDMS streams interface. This data is a timeseries (i.e. absolute timestamp is collected) that will contain information about each individual I/O event.
+
+.. image:: ../images/darshanConnector.png
+ :caption: The above diagrams provieds a high level visualization of the darshanConnector. During the Darshan initialization, the connector (on the left-hand side) checks to see if darshan has been built against the ldms library and if it has it will initialize a connection to the LDMS stream daemon when the DARSHAN_LDMS_ENABLE is set. Once initialized, the connecter will know which module data we want to collect by checking which environment variables are set. For example, if MPI-IO_ENABLE_LDMS is set, that specific I/O event data will be collected. The runtime data collection and JSON message formatting is then performed in the darshan ldms connector send function. This function is triggered whenever an I/O event occurs. The data is then published to LDMS streams interface and sent to through the LDMS Transport to be stored into a database. As you can see at the very bottom left is the JSON formatted message. Meanwhile, on the right, darshan is running as usual by initializing their modules, collecting the I/O event data for these modules, aggregating and calculating the data and then outputting the information into a Darshan log file. As you can see, the LDMS Streams implementation does not interfere with Darshan
+
+.. note::
+
+ LDMS must already be installed on the system or locally. If it is not, then please following ``Getting The Source`` and ``Building The Source`` in the `LDMS Quickstart Guide `_. If the Darshan-LDMS code is already deployed on your system, please skip to `Run An LDMS Streams Daemon`_
+
+**Metric Definitions**
+Below are the list of Darshan metrics that are currently being collected by the darshanConnector:
+
+* ``schema:`` Schema name of the data collected by the darshanConnector. This is an LDMS related metric and is only used for storing the data to the correct location in DSOS.
+
+* ``module:`` Name of the Darshan module data being collected.
+
+* ``uid:`` User ID of the job run.
+
+* ``exe:`` Full path to the application executable. Only set to the full path when the "type" metric is set to "MET". Otherwise it is set to N/A.
+
+* ``ProducerName:`` Name of the compute node the application is running on.
+
+* ``switches:`` Number of times access alternated between read and write.
+
+* ``file:`` Path to the filename of the I/O operations. Only set to the full path when the "type" metric is set to "MET". Otherwise it is set to N/A.
+
+* ``rank:`` Rank of the processes at I/O
+
+* ``flushes:`` Number of times the "flush" operation was performed. For H5F and H5D it is the HDF5 file flush and dataset flush operation counts, respectively.
+
+* ``record_id:`` Darshan file record ID of the file the dataset belongs to.
+
+* ``max_byte:`` Highest offset byte read and written (i.e. Darshan's "\_MAX_BYTE_*" parameter).
+
+* ``type:`` The type of json data being published. It is either set to MOD for gathering "module" data or MET for gathering static "meta" data (i.e. record id, rank ,etc.)
+
+* ``job_id:`` The Job ID of the application run.
+
+* ``op:`` Type of operation being performed (i.e. read, open, close, write).
+
+* ``cnt:`` The count of the operations ("op" field) performed per module per rank. Resets to 0 after each "close" operation.
+
+* ``seg:`` Contains the following array metrics from the operation ("op" field):
+
+ ``pt_sel: HDF5 number of different access selections.
+ reg_hslab: HDF5 number of regular hyperslabs.
+ irreg_hslab: HDF5 number of irregular hyperslabs.
+ ndims: HDF5 number of dimensions in dataset's dataspace.
+ npoints: HDF5 number of points in dataset's dataspace.
+ off: Cumulative total bytes read and cumulative total bytes written, respectively, for each module per rank. (i.e. Darshan's "offset" DXT parameter)
+ len: Number of bytes read/written for the given operation per rank.
+ start: Start time (seconds) of each I/O operation performed for the given rank
+ dur: Duration of each operation performed for the given rank. (i.e. a rank takes "X" time to perform a r/w/o/c operation.)
+ total: Cumulative time since the application run after the I/O operation (i.e. start of application + dur)
+ timestamp: End time of given operation (i.e. "op" field) for the given rank (i.e. "rank" field). In epoch time.``
+
+For all metric fields that don't apply to a module, a value of ``-1`` is given.
+
+All data fields which that not change throughout the entire application run (i.e. constant), unless the darshanConnector is reconnected/restarted, are listed below:
+
+* ``ProducerName``
+* ``job_id``
+* ``schema``
+* ``exe``
+* ``uid``
+
+
+Compile and Build with LDMS
+---------------------------
+1. Run the following to build Darshan and link against an existing LDMS library on the system.
+
+.. code-block:: RST
+
+ git clone https://github.com/darshan-hpc/darshan.git
+ cd darshan && mkdir build/
+ ./prepare.sh && cd build/
+ ../configure CC= \
+ --with-log-path-by-env=LOGFILE_PATH_DARSHAN \
+ --prefix=/darshan/ \
+ --with-JOB_ID-env= \
+ --enable-ldms-mod \
+ --with-ldms=
+ make && make install
+.. note::
+
+ * This configuration is specific to the system. should be replaced by the compiler wrapper for your MPI Library, (e.g., ``mpicc`` for Open MPI, or ``cc`` for Cray Development Environment MPI wrappers).
+* If running an MPI program, make sure an MPI library is installed/loaded on the system.
+ For more information on how to install and build the code across various platforms, please visit `Darshan's Runtime Installation Page `_
+* ``--with-jobid-env=`` expects a string that is the environment variable that the hosted job scheduler utilizes on the HPC system. (e.g., Slurm would use ``--with-jobid-env=SLURM_JOB_ID``)
+
+2. **OPTIONAL** To build HDF5 module for Darshan, you must first load the HDF5 modulefile with ``module load hdf5-parallel``, then run configure as follows:
+
+.. code-block:: RST
+
+ ../configure CC= \
+ --with-log-path-by-env=LOGFILE_PATH_DARSHAN \
+ --prefix=/darshan/ \
+ --with-jobid-env= \
+ --enable-ldms-mod \
+ --with-ldms=
+ --enable-hdf5-mod \
+ --with-hdf5=
+ make && make install
+
+2a. **OPTIONAL** If you do not have HDF5 installed on your system, you may install Python's ``h5py`` package with:
+
+.. code-block:: RST
+
+ sudo apt-get install -y hdf5-tools libhdf5-openmpi-dev openmpi-bin
+ # we need to build h5py with the system HDF5 lib backend
+ export HDF5_MPI="ON"
+ CC=cc python -m pip install --no-binary=h5py h5py
+
+.. note::
+
+ If the HDF5 library is installed this way, you do not need to include the ``--with-hdf5`` flag during configuration. For more information on other methods and HDF5 versions to install, please visit `Darshan's Runtime Installation Page `_.
+
+
+Run an LDMS Streams Daemon
+---------------------------
+This section will go over how to start and configure a simple LDMS Streams deamon to collect the Darshan data and store to a CSV file.
+If an LDMS Streams daemon is already running on the system then please skip to `Test the Darshan-LDMS Integrated Code (Multi Node)`_.
+
+1. First, initialize an ldms streams daemon on a compute node as follows:
+
+.. code-block:: RST
+
+ salloc -N 1 --time=2:00:00 -p
+ *ssh to node*
+
+2. Once on the compute node (interactive session), set up the environment for starting an LDMS daemon:
+
+.. code-block:: RST
+
+ LDMS_INSTALL=
+ export LD_LIBRARY_PATH="$LDMS_INSTALL/lib/:$LDMS_INSTALL/lib:$LD_LIBRARY_PATH"
+ export LDMSD_PLUGIN_LIBPATH="$LDMS_INSTALL/lib/ovis-ldms/"
+ export ZAP_LIBPATH="$LDMS_INSTALL/lib/ovis-ldms"
+ export PATH="$LDMS_INSTALL/sbin:$LDMS_INSTALL/bin:$PATH"
+ export PYTHONPATH=
+ export COMPONENT_ID="1"
+ export SAMPLE_INTERVAL="1000000"
+ export SAMPLE_OFFSET="0"
+ export HOSTNAME="localhost"
+
+.. note::
+
+ LDMS must already be installed on the system or locally. If it is not, then please follow ``Getting The Source`` and ``Building The Source`` in the `LDMS Quickstart Guide `_.
+
+3. Next, create a file called **"darshan\_stream\_store.conf"** and add the following content to it:
+
+.. code-block:: RST
+
+ load name=hello_sampler
+ config name=hello_sampler producer=${HOSTNAME} instance=${HOSTNAME}/hello_sampler stream=darshanConnector component_id=${COMPONENT_ID}
+ start name=hello_sampler interval=${SAMPLE_INTERVAL} offset=${SAMPLE_OFFSET}
+
+ load name=stream_csv_store
+ config name=stream_csv_store path=./streams/store container=csv stream=darshanConnector rolltype=3 rollover=500000
+
+4. Next, run the LDSM Streams daemon with the following command:
+
+.. code-block:: RST
+
+ ldmsd -x sock:10444 -c darshan_stream_store.conf -l /tmp/darshan_stream_store.log -v DEBUG -r ldmsd.pid
+
+.. note::
+
+ To check that the ldmsd daemon is connected running, run ``ps auwx | grep ldmsd | grep -v grep``, ``ldms_ls -h -x sock -p -a none -v`` or ``cat /tmp/darshan_stream_store.log``. Where is the node where the LDMS daemon exists and is the port number it is listening on.
+
+Test the Darshan-LDMS Integrated Code (Multi Node)
+---------------------------
+This section gives step by step instructions on how to test the Darshan-LDMS Integrated code (i.e. darshanConnector) by executing a simple test application provided by Darshan.
+
+Set The Environment
+////////////////////
+1. Once the LDMS streams daemon is initialized, **open another terminal window (login node)** and set the following environment variables before running an application test with Darshan:
+
+.. code-block:: RST
+
+ export DARSHAN_INSTALL_PATH=
+ export LD_PRELOAD=$DARSHAN_INSTALL_PATH/lib/libdarshan.so
+ export LD_LIBRARY_PATH=$DARSHAN_INSTALL_PATH/lib:$LD_LIBRARY_PATH
+ # optional. Please visit Darshan's webpage for more information.
+ export DARSHAN_MOD_ENABLE="DXT_POSIX,DXT_MPIIO"
+
+ # uncomment if hdf5 is enabled
+ #export C_INCLUDE_PATH=$C_INCLUDE_PATH:/usr/include/hdf5/openmpi
+ #export HDF5_LIB=/lib/libhdf5.so
+
+ #set env variables for ldms streams daemon testing
+ export DARSHAN_LDMS_STREAM=darshanConnector
+ export DARSHAN_LDMS_XPRT=sock
+ export DARSHAN_LDMS_HOST=
+ export DARSHAN_LDMS_PORT=10444
+ export DARSHAN_LDMS_AUTH=none
+
+ # enable LDMS data collection. No runtime data collection will occur if this is not exported.
+ export DARSHAN_LDMS_ENABLE=
+
+ # determine which modules we want to publish to ldmsd
+ #export DARSHAN_LDMS_ENABLE_MPIIO=
+ #export DARSHAN_LDMS_ENABLE_POSIX=
+ #export DARSHAN_LDMS_ENABLE_STDIO=
+ #export DARSHAN_LDMS_ENABLE_HDF5=
+ #export DARSHAN_LDMS_ENABLE_ALL=
+ #export DARSHAN_LDMS_VERBOSE=
+
+.. note::
+
+ The ```` is set to the node name the LDMS Streams daemon is running on (e.g. the node we previous ssh'd into). Make sure the ``LD_PRELOAD`` and at least one of the ``DARSHAN_LDMS_ENABLE_*`` variables are set. If not, no data will be collected by LDMS.
+
+.. note::
+
+ ``DARSHAN_LDMS_VERBOSE`` outputs the JSON formatted messages sent to the LDMS streams daemon. The output will be sent to STDERR.
+
+Execute Test Application
+/////////////////////////
+Now we will test the darshanConnector with Darshan's example ``mpi-io-test.c`` code by setting the following environment variables:
+
+.. code-block:: RST
+
+ export PROG=mpi-io-test
+ export DARSHAN_TMP=/tmp/darshan-ldms-test
+ export DARSHAN_TESTDIR=/darshan/darshan-test/regression
+ export DARSHAN_LOGFILE_PATH=$DARSHAN_TMP
+
+Now ``cd`` to the executable and test the appilcation with the darshanConnector enabled.
+
+.. code-block:: RST
+
+ cd darshan/darshan-test/regression/test-cases/src
+ $DARSHAN_TESTDIR/test-cases/src/${PROG}.c -o $DARSHAN_TMP/${PROG}
+ cd $DARSHAN_TMP
+ srun ${PROG} -f $DARSHAN_TMP/${PROG}.tmp.dat
+
+Once the application is complete, to view the data please skip to `Check Results`_.
+
+Test the Darshan-LDMS Integrated Code (Single Node)
+----------------------------------
+The section goes over step-by-step instructions on how to compile and execute the ``mpi-io-test.c`` program under ``darshan/darshan-test/regression/test-cases/src/``, collect the data with the LDMS streams daemon and store it to a CSV file on a single login node. This section is for those who will not be running their applications on a cluster (i.e. no compute nodes).
+
+1. Set Environment Variables for Darshan, LDMS and Darshan-LDMS Integrated code (i.e. darshanConnector).
+
+.. code-block:: RST
+
+ # Darshan
+ export DARSHAN_INSTALL_PATH=
+ export LD_PRELOAD=/lib/libdarshan.so
+ export LD_LIBRARY_PATH=$DARSHAN_INSTALL_PATH/lib:$LD_LIBRARY_PATH
+ # Optional. Please visit Darshan's runtime webpage for more information.
+ #export DARSHAN_MOD_ENABLE="DXT_POSIX,DXT_MPIIO"
+
+ # uncomment if hdf5 is enabled
+ #export C_INCLUDE_PATH=$C_INCLUDE_PATH:/usr/include/hdf5/openmpi
+ #export HDF5_LIB=/libhdf5.so
+
+ # LDMS
+
+ LDMS_INSTALL=
+ export LD_LIBRARY_PATH="$LDMS_INSTALL/lib/:$LDMS_INSTALL/lib:$LD_LIBRARY_PATH"
+ export LDMSD_PLUGIN_LIBPATH="$LDMS_INSTALL/lib/ovis-ldms/"
+ export ZAP_LIBPATH="$LDMS_INSTALL/lib/ovis-ldms"
+ export PATH="$LDMS_INSTALL/sbin:$LDMS_INSTALL/bin:$PATH"
+ export PYTHONPATH=
+ export COMPONENT_ID="1"
+ export SAMPLE_INTERVAL="1000000"
+ export SAMPLE_OFFSET="0"
+ export HOSTNAME="localhost"
+
+ # darshanConnector
+ export DARSHAN_LDMS_STREAM=darshanConnector
+ export DARSHAN_LDMS_XPRT=sock
+ export DARSHAN_LDMS_HOST=
+ export DARSHAN_LDMS_PORT=10444
+ export DARSHAN_LDMS_AUTH=none
+
+ # enable LDMS data collection. No runtime data collection will occur if this is not exported.
+ export DARSHAN_LDMS_ENABLE=
+
+ # determine which modules we want to publish to ldmsd
+ #export DARSHAN_LDMS_ENABLE_MPIIO=
+ #export DARSHAN_LDMS_ENABLE_POSIX=
+ #export DARSHAN_LDMS_ENABLE_STDIO=
+ #export DARSHAN_LDMS_ENABLE_HDF5=
+ #export DARSHAN_LDMS_ENABLE_ALL=
+ #export DARSHAN_LDMS_VERBOSE=
+
+.. note::
+
+ ``DARSHAN_LDMS_VERBOSE`` outputs the JSON formatted messages sent to the LDMS streams daemon. The output will be sent to STDERR.
+
+2. Generate the LDMSD Configuration File and Start the Daemon
+
+.. code-block:: RST
+
+ cat > darshan_stream_store.conf << EOF
+ load name=hello_sampler
+ config name=hello_sampler producer=${HOSTNAME} instance=${HOSTNAME}/hello_sampler stream=darshanConnector component_id=${COMPONENT_ID}
+ start name=hello_sampler interval=${SAMPLE_INTERVAL} offset=${SAMPLE_OFFSET}
+
+ load name=stream_csv_store
+ config name=stream_csv_store path=./streams/store container=csv stream=darshanConnector rolltype=3 rollover=500000
+ EOF
+
+ ldmsd -x sock:10444 -c darshan_stream_store.conf -l /tmp/darshan_stream_store.log -v DEBUG
+ # check daemon is running
+ ldms_ls -p 10444 -h localhost -v
+
+3. Set Up Test Case Variables
+
+.. code-block:: RST
+
+ export PROG=mpi-io-test
+ export DARSHAN_TMP=/tmp/darshan-ldms-test
+ export DARSHAN_TESTDIR=/darshan/darshan-test/regression
+ export DARSHAN_LOGFILE_PATH=$DARSHAN_TMP
+
+4. Run Darshan's mpi-io-test.c program
+
+.. code-block:: RST
+
+ cd darshan/darshan-test/regression/test-cases/src
+ $DARSHAN_TESTDIR/test-cases/src/${PROG}.c -o $DARSHAN_TMP/${PROG}
+ cd $DARSHAN_TMP
+ ./${PROG} -f $DARSHAN_TMP/${PROG}.tmp.dat
+
+Once the application is complete, to view the data please skip to `Check Results`_.
+
+Pre-Installed Darshan-LDMS
+---------------------------
+If both the Darshan-LDMS integrated code (i.e., darshanConnector) and LDMS are already installed, and a system LDMS streams daemon is running, then there are two ways to enable the LDMS functionality:
+
+1. Set the environment via sourcing the ``darshan_ldms.env`` script
+
+2. Load the Darshan-LDMS module via ``module load darshan_ldms``
+
+.. note::
+
+ Only when executing an application or submitting a job does the user need to load the ``darshan_ldms`` modulefile or source the ``darshan_ldms.env`` script. Compiling, building, or installing the application does not affect the darshanConnector and vice versa.
+
+1. Set Environment
+///////////////////
+
+In order to enable the darshanConnector code on the system, just source the following env script:
+
+.. code-block:: RST
+
+ module use /projects/ovis/modules/
+ source /projects/ovis/modules//darshan_ldms.env
+
+**OPTIONAL**: Add a "-v" when sourcing this file to enable verbose:
+
+.. code-block:: RST
+
+ $ source /projects/ovis/modules//darshan_ldms.env -v
+
+This will output json messages collected by ldms to the terminal window.
+
+.. note::
+
+ The STDIO data will NOT be collected by LDMS. This is to prevent any recursive LDMS function calls.
+
+2. Load Module
+///////////////
+
+If you do not wish to set the environment using the env script from above, you can always load the ``darshan_ldms`` modulefile, as follows:
+
+.. code-block:: RST
+
+ module use /projects/ovis/modules/
+ module load darshan_ldms
+
+**OPTIONAL**: If you decide to load the module, you will need to turn on verbose by setting the following environment variable in your run script:
+
+.. code-block:: RST
+ export DARSHAN_LDMS_VERBOSE="true"
+
+Script Information
+///////////////////
+
+The darshan_ldms module and .env file set the following env variables to define where the Darshan install is located, the LDMS daemon connection and what kind of file level access data will be published and stored to DSOS (via LDMS streams).
+
+If you only want to collect a specific type of data such as "MPIIO" then you will only set the ``DARSHAN_LDMS_ENABLE_MPIIO`` variable:
+
+.. code-block:: RST
+ export DARSHAN_LDMS_ENABLE_MPIIO=""
+
+If you want to collect all types of data then set all *_ENABLE_LDMS variables:
+
+.. code-block:: RST
+ export DARSHAN_LDMS_ENABLE_MPIIO=""
+ export DARSHAN_LDMS_ENABLE_POSIX=""
+ export DARSHAN_LDMS_ENABLE_HDF5=""
+
+.. note::
+
+ All Darshan binary log-files (i.e. .darshan) will be saved to ``$LOGFILE_PATH_DARSHAN``, as specified at build time and exported in the user environment.
+
+.. code-block:: RST
+
+ # Set variables for darshan install
+ export LD_PRELOAD=$LD_PRELOAD:$DARSHAN_INSTALL_PATH/lib/libdarshan.so
+ export PATH=$PATH:$DARSHAN_INSTALL_PATH/bin
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$DARSHAN_INSTALL_PATH/lib
+ export LIBRARY_PATH=$LIBRARY_PATH:$DARSHAN_INSTALL_PATH/lib
+
+ export DARSHAN_RUNTIME_DIR=$DARSHAN_INSTALL_PATH
+ export DARSHAN_RUNTIME_BIN=$DARSHAN_INSTALL_PATH/bin
+ export DARSHAN_RUNTIME_LIB=$DARSHAN_INSTALL_PATH/lib
+ export HDF5_USE_FILE_LOCKING=1
+
+ # Set logfile path
+ export DARSHAN_TMP=/projects/ovis/darshanConnector//darshan/build/logs/
+ export LOGFILE_PATH_DARSHAN=$DARSHAN_TMP
+
+ # Connect to ldms daemon
+ export DARSHAN_LDMS_STREAM=darshanConnector
+ export DARSHAN_LDMS_PORT=412
+ export DARSHAN_LDMS_HOST=localhost
+ export DARSHAN_LDMS_XPRT=sock
+ export DARSHAN_LDMS_AUTH=munge
+
+ # Specify type of data to collect
+ export DARSHAN_LDMS_ENABLE=
+ export DARSHAN_LDMS_ENABLE_MPIIO=
+ export DARSHAN_LDMS_ENABLE_POSIX=
+ export DARSHAN_LDMS_ENABLE_STDIO=
+ export DARSHAN_LDMS_ENABLE_HDF5=
+ #export DARSHAN_LDMS_ENABLE_ALL=
+ #export DARSHAN_LDMS_VERBOSE=
+
+ # check if verbose is requested
+ if [ "$1" == "-v" ]; then
+ export DARSHAN_LDMS_VERBOSE=
+ echo "Verbose is set."
+ else
+ unset DARSHAN_LDMS_VERBOSE
+ fi
+
+
+Run application
+///////////////
+Once the module is loaded and the environment is set, you will just need to run your application. All darshan related logs will automatically be saved in the directory specified in ``$LOGFILE_PATH_DARSHAN``.
+
+.. note::
+
+ If runtime errors or issues occur, then this is most likely due to incompatibility issues with the application build, or the Darshan-LDMS build that is using ``LD_PRELOAD``. You may debug the issue, as follows:
+
+ 1. Unset the ``LD_PRELOAD`` environment variable (e.g., ``unset LD_PRELOAD``), then run the application with: ``mpiexec -env LD_PRELOAD $DARSHAN_INSTALL_PATH/lib/libdarshan.so`` or ``srun --export=LD_PRELOAD=$DARSHAN_INSTALL_PATH/lib/libdarshan.so``.
+ For more information please see section 5.2 in `Darshan's Runtime Installation Page `_.
+
+ 2. If you are still running into runtime issues, please send an email to ldms@sandia.gov and provide:
+ a) mpi-io, hdf5, pnetcdf, compiler version (if applicable) used to build your application
+ b) Contents of your environment variables: $PATH, $LIBRARY_PATH, $LD_LIBRARY_PATH and $LD_PRELOAD.
+
+
+Check Results
+-------------
+LDMS Output
+////////////
+This section provides the expected output of an application run with the data published to LDMS streams daemon with a CSV storage plugin (see section `Run An LDMS Streams Daemon`_).
+
+* If you are publishing to a Local Streams Daemon (compute or login nodes) to collect the Darshan data, then compare the generated ``csv`` file to the one shown below in this section.
+
+* If you are publishing to a System Daemon, that aggregates the data and stores to a Scalable Object Store (SOS), please skip this section and go to the :doc:`SOS Quickstart Guide ` for more information about viewing and accessing data from this database.
+
+LDMS Log File
+/////////////
+* Once the application has completed, run ``cat /tmp/hello_stream_store.log`` in the terminal window where the ldmsd is running (compute node). You should see a similar output to the one below.
+
+.. code-block:: RST
+
+ cat /tmp/hello_stream_store.log
+ Fri Feb 18 11:35:23 2022: INFO : stream_type: JSON, msg: "{ "job_id":53023,"rank":3,"ProducerName":"nid00052","file":"darshan-output/mpi-io-test.tmp.dat","record_id":1601543006480890062,"module":"POSIX","type":"MET","max_byte":-1,"switches":-1,"flushes":-1,"cnt":1,"op":"opens_segment","seg":[{"data_set":"N/A","pt_sel":-1,"irreg_hslab":-1,"reg_hslab":-1,"ndims":-1,"npoints":-1,"off":-1,"len":-1,"dur":0.00,"timestamp":1645209323.082951}]}", msg_len: 401, entity: 0x155544084aa0
+ Fri Feb 18 11:35:23 2022: INFO : stream_type: JSON, msg: "{ "job_id":53023,"rank":3,"ProducerName":"nid00052","file":"N/A","record_id":1601543006480890062,"module":"POSIX","type":"MOD","max_byte":-1,"switches":-1,"flushes":-1,"cnt":1,"op":"closes_segment","seg":[{"data_set":"N/A","pt_sel":-1,"irreg_hslab":-1,"reg_hslab":-1,"ndims":-1,"npoints":-1,"off":-1,"len":-1,"dur":0.00,"timestamp":1645209323.083581}]}", msg_len: 353, entity: 0x155544083f60
+ ...
+
+CSV File
+////////
+* To view the data stored in the generated CSV file from the streams store plugin, kill the ldmsd daemon first by running: ``killall ldmsd``
+* Then ``cat`` the file in which the CSV file is located. Below is the stored DXT module data from LDMS's streams\_csv_\_store plugin for the ``mpi-io-test-dxt.sh`` test case.
+
+.. code-block:: RST
+
+ #module,uid,ProducerName,switches,file,rank,flushes,record_id,exe,max_byte,type,job_id,op,cnt,seg:off,seg:pt_sel,seg:dur,seg:len,seg:ndims,seg:reg_hslab,seg:irreg_hslab,seg:data_set,seg:npoints,seg:timestamp,seg:total,seg:start
+ POSIX,99066,n9,-1,/lustre//darshan-ldms-output/mpi-io-test_lC.tmp.out,278,-1,9.22337E+18,/lustre//darshan-ldms-output/mpi-io-test,-1,MET,10697754,open,1,-1,-1,0.007415,-1,-1,-1,-1,N/A,-1,1662576527,0.007415,0.298313
+ MPIIO,99066,n9,-1,/lustre//darshan-ldms-output/mpi-io-test_lC.tmp.out,278,-1,9.22337E+18,/lustre//darshan-ldms-output/mpi-io-test,-1,MET,10697754,open,1,-1,-1,0.100397,-1,-1,-1,-1,N/A,-1,1662576527,0.100397,0.209427
+ POSIX,99066,n11,-1,/lustre//darshan-ldms-output/mpi-io-test_lC.tmp.out,339,-1,9.22337E+18,/lustre//darshan-ldms-output/mpi-io-test,-1,MET,10697754,open,1,-1,-1,0.00742,-1,-1,-1,-1,N/A,-1,1662576527,0.00742,0.297529
+ POSIX,99066,n6,-1,/lustre//darshan-ldms-output/mpi-io-test_lC.tmp.out,184,-1,9.22337E+18,/lustre//darshan-ldms-output/mpi-io-test,-1,MET,10697754,open,1,-1,-1,0.007375,-1,-1,-1,-1,N/A,-1,1662576527,0.007375,0.295111
+ POSIX,99066,n14,-1,/lustre//darshan-ldms-output/mpi-io-test_lC.tmp.out,437,-1,9.22337E+18,/lustre//darshan-ldms-output/mpi-io-test,-1,MET,10697754,open,1,-1,-1,0.007418,-1,-1,-1,-1,N/A,-1,1662576527,0.007418,0.296812
+ POSIX,99066,n7,-1,/lustre//darshan-ldms-output/mpi-io-test_lC.tmp.out,192,-1,9.22337E+18,/lustre//darshan-ldms-output/mpi-io-test,-1,MET,10697754,open,1,-1,-1,0.007435,-1,-1,-1,-1,N/A,-1,1662576527,0.007435,0.294776
+ MPIIO,99066,n7,-1,/lustre//darshan-ldms-output/mpi-io-test_lC.tmp.out,192,-1,9.22337E+18,/lustre//darshan-ldms-output/mpi-io-test,-1,MET,10697754,open,1,-1,-1,0.033042,-1,-1,-1,-1,N/A,-1,1662576527,0.033042,0.273251
+ ...
+
+Compare With Darshan Log File(s)
+////////////////////////////////
+Parse the Darshan binary file using Darshan's standard and DXT (only if the ``DXT Module`` is enabled) parsers.
+
+.. code-block:: RST
+
+ $DARSHAN_INSTALL_PATH/bin/darshan-parser --all $LOGFILE_PATH_DARSHAN/.darshan > $DARSHAN_TMP/${PROG}.darshan.txt
+ $DARSHAN_INSTALL_PATH/bin/darshan-dxt-parser --show-incomplete $LOGFILE_PATH_DARSHAN/.darshan > $DARSHAN_TMP/${PROG}-dxt.darshan.txt
+
+Now you can view the log(s) with ``cat $DARSHAN_TMP/${PROG}.darshan.txt`` or ``cat $DARSHAN_TMP/${PROG}-dxt.darshan.txt`` and compare them to the data collected by LDMS.
+
+The ``producerName``, file path and record_id of each job should match and, if ``dxt`` was enabled, the individual I/O statistics of each rank (i.e., start time and number of I/O operations).
+
+
+Kokkos
+***********************
+* Appropriate Kokkos function calls must be included in the application code. Add the following environmental variables to your run script to push Kokkos data from the application to stream for collection.
+
+**What Is Kokkos?**
+
+A C++ parallel programming ecosystem for performance portability across multi-core, many-core, and GPU node architectures. Provides abstractions of parallel execution of code and data management.
+
+Setup and Configuration
+----------------------
+**The KokkosConnector**
+
+A Kokkos-LDMS functionality that utilizes LDMS Streams to collect Kokkos related data during runtime. Kokkos sampler, provided by the Kokkos-tools library, controls the sampling rate and provides the option to sample data using a count-based push. It then formats the data to a JSON message and *publishes* it to an LDMS streams interface.
+
+.. warning::
+ To use kokkosConnector, all users will need to install Kokkos-Tools. You can find their repository and instructions on installing it here: https://github.com/kokkos/kokkos-tools
+
+
+The following environmental variables are needed in an application's runscript to run the kokkos-sampler and LDMS's kokkosConnector:
+
+.. code-block:: RST
+
+ export KOKKOS_LDMS_HOST="localhost"
+ export KOKKOS_LDMS_PORT="412"
+ export KOKKOS_PROFILE_LIBRARY="/kokkos-tools/common/kokkos_sampler/kp_sampler.so;/ovis/kokkosConnector/kp_kernel_ldms.so"
+ export KOKKOS_SAMPLER_RATE=101
+ export KOKKOS_LDMS_VERBOSE=0
+ export KOKKOS_LDMS_AUTH="munge"
+ export KOKKOS_LDMS_XPRT="sock"
+
+* The KOKKOS_SAMPLER_RATE variable determines the rate of messages pushed to streams and collected. Please note that it is in best practice to set this to a prime number to avoid collecting information from the same kernels.
+* The KOKKOS_LDMS_VERBOSE variable can be set to 1 for debug purposes which prints all collected kernel data to the console.
+
+How To Make A Data Connector
+*****************************
+In order to create a data connector with LDMS to collect runtime timeseries application data, you will need to utilize LDMS's Streams Functionality. This section will provide the necessary functions and Streams API required to make the data connector.
+
+The example (code) below is pulled from the Darshan-LDMS Integration code.
+
+.. note::
+
+ The LDMS Streams functionality uses a push-based method to reduce memory consumed and data loss on the node.
+
+Include the following LDMS files
+---------------------------------------
+* First, the following libaries will need to be included in the program as these contain all the functions that the data connector will be using/calling.
+.. code-block:: RST
+
+ #include
+ #include
+ #include
+
+Initialize All Necessary Variables
+-----------------------------------
+
+* Next, the following variables will need to be initialized globally or accessible by the Streams API Functions described in the next section:
+
+.. code-block:: RST
+
+ #define SLURM_NOTIFY_TIMEOUT 5
+ ldms_t ldms_g;
+ pthread_mutex_t ln_lock;
+ int conn_status, to;
+ ldms_t ldms_darsh;
+ sem_t conn_sem;
+ sem_t recv_sem;
+
+
+Copy "Hello Sampler" Streams API Functions
+------------------------------------------
+Next, copy the ``ldms_t setup_connection`` and ``static void event_cb`` functions listed below. These functions originated from the `ldmsd_stream_subscribe.c `_ code.
+
+The ``setup_connection`` contains LDMS API calls that connects to the LDMS daemon and the ``static void event_cb`` is a callback function to check the connection status of the LDMS Daemon.
+
+.. code-block:: RST
+
+ static void event_cb(ldms_t x, ldms_xprt_event_t e, void *cb_arg)
+ {
+ switch (e->type) {
+ case LDMS_XPRT_EVENT_CONNECTED:
+ sem_post(&conn_sem);
+ conn_status = 0;
+ break;
+ case LDMS_XPRT_EVENT_REJECTED:
+ ldms_xprt_put(x);
+ conn_status = ECONNREFUSED;
+ break;
+ case LDMS_XPRT_EVENT_DISCONNECTED:
+ ldms_xprt_put(x);
+ conn_status = ENOTCONN;
+ break;
+ case LDMS_XPRT_EVENT_ERROR:
+ conn_status = ECONNREFUSED;
+ break;
+ case LDMS_XPRT_EVENT_RECV:
+ sem_post(&recv_sem);
+ break;
+ case LDMS_XPRT_EVENT_SEND_COMPLETE:
+ break;
+ default:
+ printf("Received invalid event type %d\n", e->type);
+ }
+ }
+
+ ldms_t setup_connection(const char *xprt, const char *host,
+ const char *port, const char *auth)
+ {
+ char hostname[PATH_MAX];
+ const char *timeout = "5";
+ int rc;
+ struct timespec ts;
+
+ if (!host) {
+ if (0 == gethostname(hostname, sizeof(hostname)))
+ host = hostname;
+ }
+ if (!timeout) {
+ ts.tv_sec = time(NULL) + 5;
+ ts.tv_nsec = 0;
+ } else {
+ int to = atoi(timeout);
+ if (to <= 0)
+ to = 5;
+ ts.tv_sec = time(NULL) + to;
+ ts.tv_nsec = 0;
+ }
+
+ ldms_g = ldms_xprt_new_with_auth(xprt, auth, NULL);
+ if (!ldms_g) {
+ printf("Error %d creating the '%s' transport\n",
+ errno, xprt);
+ return NULL;
+ }
+
+ sem_init(&recv_sem, 1, 0);
+ sem_init(&conn_sem, 1, 0);
+
+ rc = ldms_xprt_connect_by_name(ldms_g, host, port, event_cb, NULL);
+ if (rc) {
+ printf("Error %d connecting to %s:%s\n",
+ rc, host, port);
+ return NULL;
+ }
+ sem_timedwait(&conn_sem, &ts);
+ if (conn_status)
+ return NULL;
+ return ldms_g;
+ }
+
+Initialize and Connect to LDMSD
+------------------------------------------
+Once the above functions have been copied, the ``setup_connection`` will need to be called in order to establish a connection an LDMS Streams Daemon.
+
+.. note::
+
+ The LDMS Daemon is configured with the `Streams Plugin `_ and should already be running on the node. The host is set to the node the daemon is running on and port is set to the port the daemon is listening to. Below you will find an example of the Darshan Connector for reference.
+
+.. code-block:: RST
+
+ void darshan_ldms_connector_initialize()
+ {
+ const char* env_ldms_stream = getenv("DARSHAN_LDMS_STREAM");
+ const char* env_ldms_xprt = getenv("DARSHAN_LDMS_XPRT");
+ const char* env_ldms_host = getenv("DARSHAN_LDMS_HOST");
+ const char* env_ldms_port = getenv("DARSHAN_LDMS_PORT");
+ const char* env_ldms_auth = getenv("DARSHAN_LDMS_AUTH");
+
+ /* Check/set LDMS transport type */
+ if (!env_ldms_xprt || !env_ldms_host || !env_ldms_port || !env_ldms_auth || env_ldms_stream){
+ printf("Either the transport, host, port or authentication is not given\n");
+ return;
+ }
+
+ pthread_mutex_lock(ln_lock);
+ ldms_darsh = setup_connection(env_ldms_xprt, env_ldms_host, env_ldms_port, env_ldms_auth);
+ if (conn_status != 0) {
+ printf("Error setting up connection to LDMS streams daemon: %i -- exiting\n", conn_status);
+ pthread_mutex_unlock(ln_lock);
+ return;
+ }
+ else if (ldms_darsh->disconnected){
+ printf("Disconnected from LDMS streams daemon -- exiting\n");
+ pthread_mutex_unlock(ln_lock);
+ return;
+ }
+ pthread_mutex_unlock(ln_lock);
+ return;
+ }
+
+The environment variables ``DARSHAN_LDMS_X`` are used to define the stream name (configured in the daemon), transport type (sock, ugni, etc.), host, port and authentication of the LDMSD. In this specific example, the stream name is set to "darshanConnector" so the environment variable, ``DARSHAN_LDMS_STREAM`` is exported as follows: ``export DARSHAN_LDMS_STREAM=darshanConnector``
+
+.. note::
+ The environment variables are not required. The stream, transport, host, port and authentication can be initialized and set within in the code.
+
+.. note::
+ If you run into the following error: ``error:unknown type name 'sem_t'`` then you will need to add the following libraries to your code:
+
+ * ``#include ``
+ * ``#include ``
+
+Publish Event Data to LDMSD
+-------------------------------------
+Now we will create a function that will collect all relevent application events and publish to the LDMS Streams Daemon. In the Darshan-LDMS Integration, the following Darshan's I/O traces for each I/O event (i.e. open, close, read, write) are collected along with the absolute timestamp (for timeseries data) for each I/O event:
+
+.. code-block:: RST
+
+ void darshan_ldms_connector_send(int64_t record_count, char *rwo, int64_t offset, int64_t length, int64_t max_byte, int64_t rw_switch, int64_t flushes, double start_time, double end_time, struct timespec tspec_start, struct timespec tspec_end, double total_time, char *mod_name, char *data_type)
+ {
+ char jb11[1024];
+ int rc, ret, i, size, exists;
+ env_ldms_stream = getenv("DARSHAN_LDMS_STREAM");
+
+ pthread_mutex_lock(ln_lock);
+ if (ldms_darsh != NULL)
+ exists = 1;
+ else
+ exists = 0;
+ pthread_mutex_unlock(ln_lock);
+
+ if (!exists){
+ return;
+ }
+
+ sprintf(jb11,"{ \"uid\":%ld, \"exe\":\"%s\",\"job_id\":%ld,\"rank\":%ld,\"ProducerName\":\"%s\",\"file\":\"%s\",\"record_id\":%"PRIu64",\"module\":\"%s\",\"type\":\"%s\",\"max_byte\":%ld,\"switches\":%ld,\"flushes\":%ld,\"cnt\":%ld,\"op\":\"%s\",\"seg\":[{\"data_set\":\"%s\",\"pt_sel\":%ld,\"irreg_hslab\":%ld,\"reg_hslab\":%ld,\"ndims\":%ld,\"npoints\":%ld,\"off\":%ld,\"len\":%ld,\"start\":%0.6f,\"dur\":%0.6f,\"total\":%.6f,\"timestamp\":%lu.%.6lu}]}", dC.uid, dC.exename, dC.jobid, dC.rank, dC.hname, dC.filename, dC.record_id, mod_name, data_type, max_byte, rw_switch, flushes, record_count, rwo, dC.data_set, dC.hdf5_data[0], dC.hdf5_data[1], dC.hdf5_data[2], dC.hdf5_data[3], dC.hdf5_data[4], offset, length, start_time, end_time-start_time, total_time, tspec_end.tv_sec, micro_s);
+
+ rc = ldmsd_stream_publish(ldms_darsh, env_ldms_stream, LDMSD_STREAM_JSON, jb11, strlen(jb11) + 1);
+ if (rc)
+ printf("Error %d publishing data.\n", rc);
+
+ out_1:
+ return;
+ }
+
+.. note::
+
+ For more information about the various Darshan I/O traces and metrics collected, please visit `Darshan's Runtime Installation Page `_ and `Darshan LDMS Metrics Collected `_ pages.
+
+Once this function is called, it initializes a connection to the LDMS Streams Daemon, attempts reconnection if the connection is not established, then formats the given arguements/variables into a JSON message format and finally publishes to the LDMS Streams Deamon.
+
+There are various types of formats that can be used to publish the data (i.e. JSON, string, etc.) so please review the `Defining A Format`_ section for more information.
+
+Collect Event Data
+/////////////////////////
+
+To collect the application data in real time (and using the example given in this section), the ``void darshan_ldms_connector_send(arg1, arg2, arg3,....)`` will be placed in all sections of the code where we want to publish a message. From the Darshan-LDMS Integration code we would have:
+
+.. code-block:: RST
+
+ darshan_ldms_connector_send(rec_ref->file_rec->counters[MPIIO_COLL_OPENS] + rec_ref->file_rec->counters[MPIIO_INDEP_OPENS], "open", -1, -1, -1, -1, -1, __tm1, __tm2, __ts1, __ts2, rec_ref->file_rec->fcounters[MPIIO_F_META_TIME], "MPIIO", "MET");
+
+This line of code is placed within multiple macros (`MPIIO_RECORD_OPEN/READ/WRITE `_) in Darshan's MPIIO module.
+
+* Doing this will call the function everytime Darshan detects an I/O event from the application (i.e. read, write, open, close). Once called, the arguements will be passed to the function, added to the JSON formatted message and pushed to the LDMS daemon.
+
+.. note::
+
+ For more information about how to store the published data from and LDMS Streams Daemon, please see the Stream CSV Store plugin man pages on a system where LDMS Docs are installed: ``man Plugin_stream_csv_store``
diff --git a/rtd/docs/source/ldms-tutorial.rst b/rtd/docs/source/ldms-tutorial.rst
new file mode 100644
index 000000000..ce4b26df7
--- /dev/null
+++ b/rtd/docs/source/ldms-tutorial.rst
@@ -0,0 +1,4 @@
+Additional LDMS Tutorial Material
+===============================
+* `Tutorial Videos `_
+* `Tutorial Slides `_
diff --git a/rtd/docs/source/ldms_man/Plugin_cray_dvs_sampler.rst b/rtd/docs/source/ldms_man/Plugin_cray_dvs_sampler.rst
new file mode 100644
index 000000000..7788bea20
--- /dev/null
+++ b/rtd/docs/source/ldms_man/Plugin_cray_dvs_sampler.rst
@@ -0,0 +1,108 @@
+=======================
+Plugin_cray_dvs_sampler
+=======================
+
+:Date: 05 Feb 2018
+
+.. contents::
+ :depth: 3
+..
+
+NAME
+========================
+
+Plugin_cray_dvs_sampler - man page for the LDMS cray_dvs_sampler plugin
+
+SYNOPSIS
+============================
+
+| Within ldmsd_controller or a configuration file:
+| config name=cray_dvs_sampler [ = ]
+
+DESCRIPTION
+===============================
+
+With LDMS (Lightweight Distributed Metric Service), plugins for the
+ldmsd (ldms daemon) are configured via ldmsd_controller or a
+configuration file.
+
+The cray_dvs_sampler plugin provides memory info from
+/proc/fs/dvs/mount/[mount-id]/stats. A separate metric set is produced
+for each mount point. Metric set names are of the form \`XXX'.
+
+See section \`DATA AND THE CONFIGURATION FILE' for information on the
+variables and configuration file.
+
+This sampler is for Cray systems only.
+
+CONFIGURATION ATTRIBUTE SYNTAX
+==================================================
+
+The cray_dvs_sampler plugin uses the sampler_base base class. This man
+page covers only the configuration attributes, or those with default
+values, specific to the this plugin; see ldms_sampler_base.man for the
+attributes of the base class.
+
+**config**
+ | name= [schema= conffile=]
+ | configuration line
+
+ name=
+ |
+ | This MUST be cray_dvs_sampler
+
+ schema=
+ |
+ | Optional schema name. It is intended that the same sampler on
+ different nodes with different metrics have a different schema.
+ If not specified, will default to \`cray_dvs_sampler\`.
+
+ conffile=
+ |
+ | Optional path to the configuration file
+
+DATA AND THE CONFIGURATION FILE
+===================================================
+
+| The data source is /proc/fs/dvs/mount/[mount-id]/stats. This file
+ consists of a number of lines of the format
+| variablename: v1 v2 ... vN
+
+The number of values varies between 1 and 6. Each line will then produce
+between 1 and 6 metrics with names of the form variablename appended by
+an additional string associated with thr interpretation of that value
+(e.g, min, err).
+
+By default, this sampler will collect all the variables for all mount
+points. The number of metrics can be downselected by using a
+configuration file (see conffile argument). The format of this file is
+one variablename per line, comments start with '#' and blank lines are
+skipped. Note that the variablename from the dataline is what is
+specified in the configuration file, not the metricnames associated with
+that variablename in the data source file. As a result, all metrics
+associated with a give line in the dvs stats source are included or
+excluded together.
+
+NOTES
+=========================
+
+- In the config, the sampler is called cray_dvs_sampler. Also the
+ library is called libcray_dvs_sampler. However, the source file is
+ dvs_sampler.c
+
+- This sampler is for Cray systems only.
+
+BUGS
+========================
+
+None known.
+
+EXAMPLES
+============================
+
+TBD
+
+SEE ALSO
+============================
+
+ldmsd(8), ldms_quickstart(7), ldmsd_controller(8), ldms_sampler_base(7)
diff --git a/rtd/docs/source/ldms_man/Plugin_jobid.rst b/rtd/docs/source/ldms_man/Plugin_jobid.rst
new file mode 100644
index 000000000..73cb07581
--- /dev/null
+++ b/rtd/docs/source/ldms_man/Plugin_jobid.rst
@@ -0,0 +1,125 @@
+============
+Plugin_jobid
+============
+
+:Date: 03 Dec 2016
+
+.. contents::
+ :depth: 3
+..
+
+NAME
+=============
+
+Plugin_jobid - man page for the LDMS jobid plugin
+
+SYNOPSIS
+=================
+
+| Within ldmsd_controller or in a configuration file
+| config name=jobid [ = ]
+
+DESCRIPTION
+====================
+
+With LDMS (Lightweight Distributed Metric Service), plugins for the
+ldmsd (ldms daemon) are configured via ldmsd_controller or a
+configuration file. The jobid plugin provides jobid info from
+/var/run/ldms.jobinfo or similar files replaced periodically by resource
+managers. When files are missing, the value 0 or equivalent is reported.
+
+CONFIGURATION ATTRIBUTE SYNTAX
+=======================================
+
+**config**
+ | name= producer= instance=
+ [component_id= schema=] [with_jobid=]
+ file=
+ | configuration line
+
+ name=
+ |
+ | This MUST be jobid.
+
+ producer=
+ |
+ | The producer name value.
+
+ instance=
+ |
+ | The name of the metric set.
+
+ schema=
+ |
+ | Optional schema name. It is intended that the same sampler on
+ different nodes with different metrics have a different schema.
+ If not specified, will default to \`vmstat\`.
+
+ component_id=
+ |
+ | Optional component identifier. Defaults to zero.
+
+ with_jobid=
+ |
+ | Option to lookup job_id with set or 0 if not. The job_id column
+ will always appear, but populated witn zero.
+
+BUGS
+=============
+
+No known implementation bugs. Design features you may not like: Relies
+on site-specific resource manager configuration to produce the file
+read. Does not query local or remote manager daemons. May be slow to
+sample and generate undesirable filesystem events if filepath is on a
+networked filesystem instead of a node-local RAM partition as is usual
+in clusters.
+
+NOTES
+==============
+
+The colname option from LDMS v2 slurmjobid plugin is no longer
+supported. The sampler offset for the jobid plugin should be slightly
+less than all other plugins to ensure consistency in the job information
+reported for a given time interval across all other plugins. The time
+interval for the jobid plugin need only be approximately the clock
+granularity of the resource manager.
+
+Other samplers use the jobid plugin as the jobid data source. If the
+jobid sampler is not loaded, these samplers will report 0 jobid values.
+
+EXAMPLES
+=================
+
+::
+
+ Within ldmsd_controller or in a configuration file
+ load name=jobid
+ config name=jobid component_id=1 producer=vm1_1 instance=vm1_1/jobid
+ start name=jobid interval=1000000 offset=-100000
+
+
+ Within ldmsd_controller or in a configuration file
+ load name=jobid
+ config name=jobid component_id=1 producer=vm1_1 instance=vm1_1/jobid file=/var/run/rman/node/jobinfo
+ start name=jobid interval=1000000 offset=-100000
+
+Slurm 2.x installations can populate /var/run/ldms.jobid by adding the
+following lines to slurm.epilog and slurm.prolog, respectively.
+
+::
+
+
+ echo "JOBID=0" > /var/run/ldms.jobinfo
+
+ and
+
+ echo JOBID=$SLURM_JOBID > /var/run/ldms.jobinfo
+ echo UID=$SLURM_UID >> /var/run/ldms.jobinfo
+ echo USER=$SLURM_JOB_USER >> /var/run/ldms.jobinfo
+
+These slurm files might be found in /etc/nodestate/bin/.
+
+SEE ALSO
+=================
+
+ldms(7), ldmsd(8), ldmsd_controller(8)
diff --git a/rtd/docs/source/ldms_man/Plugin_lustre2_client.rst b/rtd/docs/source/ldms_man/Plugin_lustre2_client.rst
new file mode 100644
index 000000000..36707f4d0
--- /dev/null
+++ b/rtd/docs/source/ldms_man/Plugin_lustre2_client.rst
@@ -0,0 +1,100 @@
+=====================
+Plugin_lustre2_client
+=====================
+
+:Date: 26 Oct 2017
+
+.. contents::
+ :depth: 3
+..
+
+NAME
+======================
+
+Plugin_lustre2_client - man page for the LDMS lustre2_client plugin
+
+SYNOPSIS
+==========================
+
+| Within ldmsd_controller or a configuration file:
+| ldmsctl> config name=lustre2_client [ = ]
+
+DESCRIPTION
+=============================
+
+The lustre2_client plugin provides Lustre metric information.
+
+CONFIGURATION ATTRIBUTE SYNTAX
+================================================
+
+This plugin uses the sampler_base base class. This man page covers only
+the configuration attributes, or those with default values, specific to
+the this plugin; see **ldms_sampler_base**\ (7) for the attributes of
+the base class.
+
+**config** **name**\ *=* * osc*\ **=** *mdc*\ **=** *llite*\ **=** *osc_path =* **mdc_path=**\ *"*\ **llite_path=**
+
+Descriptions:
+
+ **name**\ *=*
+ This MUST be lustre2_client.
+
+ ****
+ Please see **ldms_sampler_base**\ (7) for sampler_base options.
+
+ **osc**\ *=*
+ CSV list of OSC's.
+
+ **mdc**\ *=*
+ CSV list of MDC's.
+
+ **llite**\ *=*
+ CSV list of LLITE's.
+
+ **osc_path**\ *=*
+ A user custom path to osc.
+
+ **mdc_path**\ *=*
+ A user custom path to osc.
+
+ **llite_path**\ *=*
+ A user custom path to llite.
+
+NOTES
+=======================
+
+For oscs,mdcs and llites: if not specified, NONE of the oscs/mdcs/llites
+will be added. If {oscs,mdcs,llites} is set to \*, all of the available
+{oscs,mdcs,llites} at the time will be added.
+
+The names that make up the list of oscs, mdcs and llites do not have to
+include the uid part. For example, 'lustre-ffff8803245d4000' is the
+actual file in /proc/fs/lustre/llite/, but you can just say
+llites=lustre to include this component into the set.
+
+osc_path, mdc_path, llite_path are optional full path names of stats
+files if not in default location. The default locations are:
+/sys/kernel/debug/lustre/{osc, mdc, llite}, and /proc/fs/lustre/{osc,
+mdc, llite} depends on the Lustre version. Be aware that
+/sys/kernel/debug is only readable by privileged users.
+
+BUGS
+======================
+
+None known.
+
+EXAMPLES
+==========================
+
+::
+
+ load name=lustre2_client
+ config name=lustre2_client producer=compute1 component_id=1 instance=compute1/lustre2_client llites=*
+ ldmsctl> start name=lustre2_client interval=1000000
+ ldmsctl> quit
+
+SEE ALSO
+==========================
+
+**ldms_sampler_base**\ (7), **ldmsd**\ (8), **ldms_quickstart**\ (7),
+**ldmsd_controller**\ (8)
diff --git a/rtd/docs/source/ldms_man/Plugin_papi.rst b/rtd/docs/source/ldms_man/Plugin_papi.rst
new file mode 100644
index 000000000..d4e5cfa54
--- /dev/null
+++ b/rtd/docs/source/ldms_man/Plugin_papi.rst
@@ -0,0 +1,112 @@
+===========
+Plugin_papi
+===========
+
+:Date: 09 May 2016
+
+.. contents::
+ :depth: 3
+..
+
+NAME
+============
+
+Plugin_papi - man page for the LDMS papi sampler plugin.
+
+SYNOPSIS
+================
+
+| Within ldmsctl
+| ldmsctl> config name=spapi [ = ]
+
+DESCRIPTION
+===================
+
+With LDMS (Lightweight Distributed Metric Service), plugins for the
+ldmsd (ldms daemon) are configured via ldmsctl. The papi sampler plugin
+runs on the nodes and provides data about the the occurrence of
+micro-architectural events using papi library by accessing hardware
+performance counters.
+
+ENVIRONMENT
+===================
+
+You will need to build LDMS with --enable-papi. Papi library should be
+available through plugin library path.
+
+LDMSCTL CONFIGURATION ATTRIBUTE SYNTAX
+==============================================
+
+**config**
+ name= events=
+ pid= producer= instance=
+ [schema=] [component_id= with_jobid=