From a75b1d9a7930f0666d7416d6a4294e28f3abf6b8 Mon Sep 17 00:00:00 2001
From: Morgan Rockett <morgan.rockett@tufts.edu>
Date: Tue, 9 Jul 2024 18:36:58 -0400
Subject: [PATCH] ci: added pylint in github workflow to enforce code quality;
 fixes #264

This commit made with the assistance of github copilot

Signed-off-by: Morgan Rockett <morgan.rockett@tufts.edu>
---
 .github/workflows/ci.yml                      |  37 +-
 .pylintrc                                     | 327 ++++++++++++++++++
 README.md                                     |  76 +++-
 scripts/plot-samples.py                       | 121 +++++++
 scripts/plot.py                               |  96 -----
 scripts/pylint.sh                             |  50 +++
 .../bench/parsec/evm/contracts/gen_header.py  | 243 +++++++------
 7 files changed, 736 insertions(+), 214 deletions(-)
 create mode 100644 .pylintrc
 create mode 100644 scripts/plot-samples.py
 delete mode 100644 scripts/plot.py
 create mode 100755 scripts/pylint.sh

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index cf85442c4..7ec1392a1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,7 +18,7 @@ env:
 jobs:
   build-release:
     name: Build Release Candidate
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     env:
       BUILD_RELEASE: 1
     steps:
@@ -30,7 +30,7 @@ jobs:
       - name: Setup Local Dependencies
         run: ./scripts/setup-dependencies.sh
       - name: Build
-        run: scripts/build.sh
+        run: ./scripts/build.sh
   lint:
     name: Lint
     runs-on: ubuntu-20.04
@@ -43,12 +43,32 @@ jobs:
       - name: Setup Local Dependencies
         run: ./scripts/setup-dependencies.sh
       - name: Build
-        run: scripts/build.sh
+        run: ./scripts/build.sh
       - name: Lint
-        run: scripts/lint.sh
+        run: ./scripts/lint.sh
+  pylint:
+    name: Pylint
+    runs-on: ubuntu-22.04
+    continue-on-error: true
+    timeout-minutes: 10
+    strategy:
+      matrix:
+        python-version: ["3.10"]
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: recursive
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Setup Build Env
+        run: sudo ./scripts/install-build-tools.sh
+      - name: Lint with Pylint
+        run: ./scripts/pylint.sh
   unit-and-integration-test:
     name: Unit and Integration Tests
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     timeout-minutes: 30
     steps:
       - uses: actions/checkout@v2
@@ -59,9 +79,9 @@ jobs:
       - name: Setup Local Dependencies
         run: ./scripts/setup-dependencies.sh
       - name: Build
-        run: scripts/build.sh
+        run: ./scripts/build.sh
       - name: Run Unit Tests
-        run: scripts/test.sh
+        run: ./scripts/test.sh
       - name: Shorten SHA
         id: vars
         run: echo "::set-output name=sha_short::$(git rev-parse --short HEAD)"
@@ -76,7 +96,7 @@ jobs:
           retention-days: 7
   doxygen:
     name: doxygen
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     steps:
       - uses: actions/checkout@v2
         with:
@@ -94,3 +114,4 @@ jobs:
           name: OpenCBDC Transaction Processor docs for ${{ steps.vars.outputs.sha_short }}
           path: ./doxygen_generated/html/*
           retention-days: 7
+
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 000000000..1daa6a69d
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,327 @@
+# Documentation:
+# https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html
+
+[MAIN]
+
+# Specify a score threshold under which the program will exit with error.
+fail-under=10
+
+# Files or directories to be skipped. They should be base names, not paths.
+ignore=CVS
+
+# Files or directories matching the regular expression patterns are skipped.
+# The regex matches against base names, not paths. The default value ignores
+# Emacs file locks
+ignore-patterns=^\.#
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use, and will cap the count on Windows to
+# avoid hangs. Set to 0 for parallel processesing (default is 1)
+jobs=0
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Minimum Python version to use for version dependent checks. Will default to
+# the version used to run pylint.
+py-version=3.10
+
+# Discover python modules and packages in the file system subtree.
+recursive=yes
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+
+[BASIC]
+
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+
+# Naming style matching correct class constant names.
+class-const-naming-style=snake_case
+
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+
+# Naming style matching correct constant names.
+const-naming-style=snake_case
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming style matching correct function names.
+function-naming-style=snake_case
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=no
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+
+# Naming style matching correct method names.
+method-naming-style=snake_case
+
+# Naming style matching correct module names.
+module-naming-style=snake_case
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes=abc.abstractproperty
+
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+
+
+[CLASSES]
+
+# Warn about protected attribute access inside special methods
+check-protected-access-in-special-methods=no
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp,
+                      asyncSetUp,
+                      __post_init__
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method.
+max-args=10
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=24
+
+# Maximum number of locals for function / method body.
+max-locals=50
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of return / yield for function / method body.
+max-returns=10
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when caught.
+overgeneral-exceptions=builtins.BaseException,builtins.Exception
+
+
+[FORMAT]
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Maximum number of characters on a single line.
+max-line-length=79
+
+# Maximum number of lines in a module.
+max-module-lines=1000
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=yes
+
+
+[LOGGING]
+
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style=old
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+
+
+[MESSAGES CONTROL]
+
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then re-enable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=raw-checker-failed,
+        bad-inline-option,
+        locally-disabled,
+        file-ignored,
+        suppressed-message,
+        useless-suppression,
+        deprecated-pragma,
+        use-symbolic-message-instead,
+        use-implicit-booleaness-not-comparison-to-string,
+        use-implicit-booleaness-not-comparison-to-zero
+
+
+[METHOD_ARGS]
+
+# List of qualified names (i.e., library.method) which require a timeout
+# parameter e.g. 'requests.api.get,requests.api.post'
+timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=7
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit,argparse.parse_error
+
+# Let 'consider-using-join' be raised when the separator to join on would be
+# non-empty (resulting in expected fixes of the type: ``"- " + " -
+# ".join(items)``)
+suggest-join-with-non-empty-separator=yes
+
+
+[REPORTS]
+
+# Python expression which should return a score less than or equal to 10. You
+# have access to the variables 'fatal', 'error', 'warning', 'refactor',
+# 'convention', and 'info' which contain the number of messages in each
+# category, as well as 'statement' which is the total number of statements
+# analyzed. This score is used by the global evaluation report (RP0004).
+evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
+
+# Tells whether to display a full report or only the messages. Keep to toggle it
+reports=no
+
+# Activate the evaluation score.
+score=yes
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# List of comma separated words that should be considered directives if they
+# appear at the beginning of a comment and should not be checked.
+spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
+
+# Tells whether to store unknown words to the private dictionary (see the
+# --spelling-private-dict-file option) instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of symbolic message names to ignore for Mixin members.
+ignored-checks-for-mixins=no-member,
+                          not-async-context-manager,
+                          not-context-manager,
+                          attribute-defined-outside-init
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+# Regex pattern to define which classes are considered mixins.
+mixin-class-rgx=.*[Mm]ixin
+
+
+[VARIABLES]
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,_cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=yes
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
+
diff --git a/README.md b/README.md
index c066a4df8..c9215a31d 100644
--- a/README.md
+++ b/README.md
@@ -18,8 +18,11 @@ The design decisions we made to achieve these goals will help inform policy make
 **NOTE:** In cases where there are significant changes to the repository that might need manual intervention down-stream (or other important updates), we will [make a NEWS post](NEWS.md).
 
 # Architecture
+
 We have explored several architectures under two broad categories as follows:
+
 ## UHS-Based Transaction Processor
+
 We explored two system architectures for transaction settlement based on an [unspent transaction output (UTXO)](https://en.wikipedia.org/wiki/Unspent_transaction_output) data model and transaction format.
 Both architectures implement the same schema representing an [unspent hash set (UHS)](https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2018-May/015967.html) abstraction.
 One architecture provides [linearizability](https://en.wikipedia.org/wiki/linearizability) of transactions, whereas the other only provides [serializability](https://en.wikipedia.org/wiki/Serializability).
@@ -27,6 +30,7 @@ By relaxing the ordering constraint, the peak transaction throughput supported b
 Both architectures handle multiple geo-distributed datacenter outages with a [recovery time objective (RTO)](https://en.wikipedia.org/wiki/Disaster_recovery#Recovery_Time_Objective) of under ten seconds and a [recovery point objective (RPO)](https://en.wikipedia.org/wiki/Disaster_recovery#Recovery_Point_Objective) of zero.
 
 There are two UHS-based architectures as follows:
+
 1. "Atomizer" architecture
     - Materializes a total ordering of all transactions settled by the system in a linear sequence of batches.
     - Requires vertical scaling as peak transaction throughput is limited by the performance of a single system component.
@@ -41,9 +45,11 @@ There are two UHS-based architectures as follows:
 Read the [2PC & Atomizer architecture guide](docs/uhs-architectures.md) for a detailed description of the system components and implementation of each architecture.
 
 ## Parallel Architecture for Scalably Executing smart Contracts ("PArSEC")
+
 We built a system with a generic virtual machine layer that is capable of performing parallel executions of smart contracts.
 
 The architecture is composed of two layers:
+
 1. A distributed key-value data store with [ACID](https://en.wikipedia.org/wiki/ACID) database properties
     - This back-end data store is not constrained to any type of data and is agnostic to the execution later.
 1. A generic virtual machine layer that executes programs (i.e. smart contracts) and uses the distributed key-value data store to record state
@@ -54,6 +60,7 @@ The architecture is composed of two layers:
 - Unmodified smart contracts from the Ethereum ecosystem can be deployed directly onto our EVM implementation.
 
 Read the [PArSEC Architecture Guide](docs/parsec_architecture.md) for more details.
+
 # Contributing
 
 You can [sign up](https://dci.mit.edu/opencbdc-interest) to receive updates from technical working groups and to learn more about our work.
@@ -67,7 +74,9 @@ If you want to dive straight in, take a look at our issue tracker's list of [goo
 
 1. [Install Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git)
 1. Clone the repository (including submodules)
-    - `git clone --recurse-submodules https://github.com/mit-dci/opencbdc-tx`
+   ```console
+   $ git clone --recurse-submodules https://github.com/mit-dci/opencbdc-tx
+   ```
 
 # Setup the build envirnoment
 
@@ -80,6 +89,11 @@ If you just want to run the system, see "Run the Code" below.
    ```console
    # ./scripts/install-build-tools.sh
    ```
+   Note: Running Homebrew as root on mac via shell script is not supported, so run without sudo and when prompted, enter the root password.
+   ```console
+   $ ./scripts/install-build-tools.sh
+   ```
+
 1. Setup project dependencies
    This script builds and installs a local copy of several build-dependencies which are not widely packaged.
    Because it installs to a local, configurable prefix (defaulting to `./prefix`), it does not need root permissions to run.
@@ -93,20 +107,26 @@ If you just want to run the system, see "Run the Code" below.
    ```
 
 ## macOS
+
 Note that if you have not already installed the xcode cli tools you will need to:
 
 ```console
 # xcode-select --install
 ```
+
 # Run the Code
 
 The API Reference is now housed in [an external repository](https://github.com/mit-dci/opencbdc-tx-pages/).
 See the [live deployment](https://mit-dci.github.io/opencbdc-tx-pages/) to browse.
 
 ## UHS-based Architectures (2PC & Atomizer)
+
 See the [2PC & Atomizer User Guide](docs/2pc_atomizer_user_guide.md)
+
 ## PArSEC Architecture
+
 See the [PArSEC User Guide](docs/parsec_user_guide.md)
+
 # Testing
 
 Running Unit & Integration Tests
@@ -123,6 +143,7 @@ Running Unit & Integration Tests
 ## E2E Testing with Kubernetes
 
 ### Requirements
+
 - Go (go test library used to run tests)
 - Minikube
 - Helm
@@ -130,6 +151,53 @@ Running Unit & Integration Tests
 
 ### Running tests
 
-1. `./scripts/build-docker.sh`
-1. `./scripts/test-e2e-minikube.sh`
-1. Review results and logs at `testruns/<testrun-uuid>/`
+```console
+$ ./scripts/build-docker.sh
+```
+
+```console
+$ ./scripts/test-e2e-minikube.sh
+```
+
+Review results and logs at `testruns/<testrun-uuid>/`
+
+## Linting
+
+### General
+
+This script checks for newlines at the end of all tracked git files except images.
+Then it runs clang-format and clang-tidy on `.cpp` files in the following directories:
+ `src`, `tests`, `cmake-tests`, `tools`.
+
+```console
+$ ./scripts/lint.sh
+```
+
+### Python
+
+Lint all python files according to ruleset defined in `.pylintrc`.
+Optional code quality value >= 5.0 and <= 10.0 can be entered as a threshold of failure.
+
+```console
+$ ./scripts/pylint.sh 8.0
+```
+
+## Virtual Environment for Python
+
+`./scripts/install-build-tools.sh` creates a virtual environemnt.  
+Once run, follow these steps to run python code.
+
+1. Activate the virtual environment which has the required python version and packages installed.
+   ```console
+   $ source ./scripts/activate-venv.sh
+   ```
+
+2. Run python code
+   ```console
+   (.py_venv) $ python ./scripts/<script_name>.py
+   ```
+
+3. Exit the virtual environment
+   ```console
+   (.py_venv) $ deactivate
+   ```
diff --git a/scripts/plot-samples.py b/scripts/plot-samples.py
new file mode 100644
index 000000000..d070d1ded
--- /dev/null
+++ b/scripts/plot-samples.py
@@ -0,0 +1,121 @@
+import sys
+import os.path
+import glob
+import argparse
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+def parse_args():
+    '''
+    Allow user to specify the directory containing the performance data
+    example usage: python plot-samples.py <TEST DIRECTORY>
+    generates plots for all tx_samples in the specified directory
+    '''
+    parser = argparse.ArgumentParser(description=
+                                     'Plot performance data from tx_samples')
+    # help message for the directory argument
+    parser.add_argument('-d', '--dir', dest='tests_dir',
+                        action='store', default='.', type=str,
+                        help='Directory containing performance data')
+    return parser.parse_args()
+
+
+def plot_latency(fname, fig=None, ax1=None, ax2=None):
+    '''
+    Plot the throughput and latency data from a file in the
+    tx_samples directory - called by scripts/native-system-benchmark.sh
+    # list<int>, Axes -> void
+    '''
+    x, y, th_moving_avg, rates, tx_vals = [], [], [], [], []
+    fresh = False  # is this the plot of all data, or just a single plot
+
+    if not fig: # create new axes if necessary
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
+        fig.suptitle(fname+" performance data")
+        fresh = True
+
+    local_file = fname.split('/')[-1].split('_')
+    filename = f"loadgen_{local_file[-1].split('.')[0]}"
+
+    # get data from file
+    data = read_in_data(fname)
+    # first sample in file is reference time 0
+    time_start = int(data[0].split()[0])
+
+    # Format data for plotting
+    t_prev = 0
+    for idx, line in enumerate(data):
+        d = line.split()
+        if len(d) < 2:
+            break
+        a = line.split()
+        x.append((int(a[0]) - time_start)/10**9)
+        y.append(int(a[1])/10**9)
+        if x[idx] - x[t_prev] > 1:
+            tx_vals.append(x[idx])
+            rates.append(idx - t_prev)
+            th_moving_avg.append(np.mean(rates))
+            t_prev = idx
+
+    # get line of best fit
+    f1, f2 = np.polyfit(x, y, 1)
+    f1 = round(f1, 3)
+    f2 = round(f2, 3)
+
+    # plot latency data
+    ax2.set_title("Tx delay (s) vs time since start (s)")
+    ax2.plot(x, y, label=f'{filename}: data')
+    sign = '+ ' if f2 > 0 else ''
+    label = f"{filename}: Line of best fit: {f1}(sec) {sign}{f2}"
+    ax2.plot(np.array(x), f1*np.array(x)+f2, label=label)
+    ax2.legend(loc="upper right")
+    ax2.set(xlabel="Time (s)", ylabel="Latency (s)")
+
+    # plot throughput data
+    ax1.set_title("Throughput (TX/s) vs. time (s)")
+    ax1.plot(tx_vals, rates, label="Throughput")
+    ax1.plot(tx_vals,  th_moving_avg, label="(Moving) Average Throughput")
+    ax1.legend(loc="upper right")
+    ax1.set(xlabel="Time (s)", ylabel="Throughput (TX/s)")
+    if fresh:
+        fig.savefig(f"{filename}_performance.png")
+
+
+def read_in_data(fname) -> list:
+    '''
+    get data from file and return as a list of lines
+    '''
+    if not os.path.isfile(fname):
+        print(f'File {fname} does not exist')
+        sys.exit(1)
+
+    lines = []
+    try:
+        with open(fname, 'r') as f:
+            lines = f.readlines()
+    except IOError as e:
+        print(f'Error reading from file {fname}\n{e}\n')
+        sys.exit(1)
+
+    return lines
+
+
+if __name__ == '__main__':
+
+    args = parse_args()
+    tests_dir = args.tests_dir
+
+    # Get all tx sample files in the test directory
+    f_list = glob.glob(f'{tests_dir}/tx_samples_*.txt')
+    if not f_list:
+        print(f'No tx_samples files found in {tests_dir = }')
+        sys.exit(1)
+
+    global_fig, global_axs = plt.subplots(1, 2, figsize=(12, 5))
+
+    for file in f_list:
+        plot_latency(file)
+        plot_latency(file, global_fig, global_axs[0], global_axs[1])
+
+    global_fig.savefig(f'{tests_dir}/aggregate_performance.png')
diff --git a/scripts/plot.py b/scripts/plot.py
deleted file mode 100644
index 855dcdef7..000000000
--- a/scripts/plot.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import matplotlib.pyplot as plt
-import numpy as np
-import os.path
-import glob
-import sys
-
-# Usage: python plot.py <TEST DIRECTORY>
-# generate plots from tx_samples
-# list<int>, Axes -> void
-def plot_latency(fname, fig=None, ax1=None, ax2=None):
-    x = []
-    y = []
-    th_moving_avg = []
-    rates = []
-    tx_vals = []
-    fresh = False  # is this the plot of all data, or just a single plot
-
-    if (fig == None):  # create new axes if necessary
-        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
-        fig.suptitle(fname+" performance data")
-        fresh = True
-    local_file = fname.split('/')
-    local_file = local_file[-1].split('_')
-    id = local_file[-1].split('.')[0]
-    nm = 'loadgen_' + id
-
-    # get data from file
-    data = read_in_data(fname)
-    # first sample in file is reference time 0
-    time_start = int(data[0].split()[0])
-
-    queue = []
-    queue_max = 15
-
-    t_prev = 0
-
-    # Format data for plotting
-    for i in range(len(data)):
-        d = data[i].split()
-        if (len(d) < 2):
-            break
-        a = data[i].split()
-        x.append((int(a[0]) - time_start)/10**9)
-        y.append(int(a[1])/10**9)
-        if (x[i] - x[t_prev] > 1):
-            tx_vals.append(x[i])
-            rates.append(i-t_prev)
-            th_moving_avg.append(np.mean(rates))
-            t_prev = i
-
-    # get line of best fit
-    f1, f2 = np.polyfit(x, y, 1)
-    f1 = round(f1, 3)
-    f2 = round(f2, 3)
-
-    # plot latency data
-    ax2.set_title("Tx delay (s) vs time since start (s)")
-    string = nm + ': data'
-    ax2.plot(x, y, label=string)
-    sign = '+ ' if f2 > 0 else ''
-    string = "Line of best fit: " + str(f1) + "(sec) " + sign + str(f2)
-    string = nm + ': ' + string
-    ax2.plot(np.array(x), f1*np.array(x)+f2, label=string)
-    ax2.legend(loc="upper right")
-    ax2.set(xlabel="Time (s)", ylabel="Latency (s)")
-
-    # plot throughput data
-    ax1.set_title("Throughput (TX/s) vs. time (s)")
-    ax1.plot(tx_vals, rates, label="Throughput")
-    ax1.plot(tx_vals,  th_moving_avg, label="(Moving) Average Throughput")
-    ax1.legend(loc="upper right")
-    ax1.set(xlabel="Time (s)", ylabel="Throughput (TX/s)")
-    if (fresh):
-        fig.savefig(nm + "_performance.png")
-
-# get data from file
-def read_in_data(fname):
-    if (not os.path.isfile(fname)):
-        raise Exception("Cannot find file " + fname)
-    fin = open(fname, "r")
-    data = fin.readlines()
-    fin.close()
-    return data
-
-
-if __name__ == '__main__':
-    path = "."
-    # Get path to test data
-    if (len(sys.argv) > 1):
-        path = str(sys.argv[1])
-    f_list = glob.glob(path + '/tx_samples_*.txt')
-    global_fig, global_axs = plt.subplots(1, 2, figsize=(12, 5))
-    for fin in f_list:
-        plot_latency(fin)
-        plot_latency(fin, global_fig, global_axs[0], global_axs[1])
-    global_fig.savefig(path + "/aggregate_performance.png")
diff --git a/scripts/pylint.sh b/scripts/pylint.sh
new file mode 100755
index 000000000..9a31cc8d1
--- /dev/null
+++ b/scripts/pylint.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+ROOT="$(cd "$(dirname "$0")"/.. && pwd)"
+PREFIX="${ROOT}"/prefix
+MIN_CODE_QUALITY=8.0
+
+get_code_score() {
+  if [ -n "$1" ]; then
+    # set minimum quality to user input (int/float) if provided and (5.0 <= input <= 10.0)
+    if [[ $1 =~ ^([0-9]+)*([\.][0-9])?$ ]]; then
+      if (( $(echo "$1 >= 5.0" | bc -l) )) && (( $(echo "$1 <= 10.0" | bc -l) )); then
+        MIN_CODE_QUALITY=$1
+      else
+        # In the future, we want code quality to be at minimum 8.0/10.0
+        echo "Code quality score must be between 5.0 and 10.0, inclusive."
+        echo "Recommended code quality score is >= 8.0."
+        exit 1
+      fi
+    else
+      echo "Code quality score must be an integer or floating point number."
+      exit 1
+    fi
+  fi
+  echo "Linting Python code with minimum quality of $MIN_CODE_QUALITY/10.0..."
+}
+
+check_pylint() {
+  if ! command -v pylint &>/dev/null; then
+    echo "pylint is not installed."
+    echo "Run 'sudo ./scripts/install-build-tools.sh' to install pylint."
+    exit 1
+  fi
+}
+
+get_code_score $1
+if source "${ROOT}/scripts/activate-venv.sh"; then
+  echo "Virtual environment activated."
+else
+  echo "Failed to activate virtual environment."
+  exit 1
+fi
+
+check_pylint
+if ! pylint scripts src tests tools --rcfile=.pylintrc \
+      --fail-under=$MIN_CODE_QUALITY $(git ls-files '*.py'); then
+    echo "Linting failed, please fix the issues and rerun."
+  exit 1
+else
+  echo "Linting passed."
+fi
diff --git a/tools/bench/parsec/evm/contracts/gen_header.py b/tools/bench/parsec/evm/contracts/gen_header.py
index e77c9a9fd..d8b8a8712 100644
--- a/tools/bench/parsec/evm/contracts/gen_header.py
+++ b/tools/bench/parsec/evm/contracts/gen_header.py
@@ -2,15 +2,16 @@
 #                    Federal Reserve Bank of Boston
 # Distributed under the MIT software license, see the accompanying
 # file COPYING or http://www.opensource.org/licenses/mit-license.php.
-import json
 import os
+import json
 import re
 
-# Conversion method from camelCase to snake_case
-snake_convert_pattern = re.compile(r'(?<!^)(?=[A-Z])')
-def to_snake(name):
-    return snake_convert_pattern.sub('_',name).lower()
-
+copyright_license = [
+    '// Copyright (c) 2022 MIT Digital Currency Initiative,',
+    '//                    Federal Reserve Bank of Boston',
+    '// Distributed under the MIT software license, see the accompanying',
+    '// file COPYING or http://www.opensource.org/licenses/mit-license.php.'
+]
 
 # contracts specifies a dictionary of the compiled artifact location for
 # each contract we want to have included in contracts.hpp - with its short
@@ -19,104 +20,134 @@ def to_snake(name):
 # that returns the m_input data for deploying the contract, and
 # data_<shortname>_<methodname> for generating the input data necessary to
 # call the given method on the contract
-contracts = {'artifacts/contracts/ERC20.sol/Token.json':'erc20'}
-
-# Load the JSON outputs of the hardhat compilation for each contract we want
-# to include in the header file
-loaded_contracts = {}
-for k, v in contracts.items():
-    with open(k) as f:
-        loaded_contracts[v] = json.load(f)
-
-# Make sure our output folder exists
-if not os.path.exists('cpp_header'):
-    os.makedirs('cpp_header')
-
-with open('cpp_header/contracts.hpp', 'w+') as f:
-    # Write the standard copyright header in the header file
-    f.write('// Copyright (c) 2022 MIT Digital Currency Initiative,\n')
-    f.write('//                    Federal Reserve Bank of Boston\n')
-    f.write('// Distributed under the MIT software license, see the accompanying\n')
-    f.write('// file COPYING or http://www.opensource.org/licenses/mit-license.php.\n\n')
-    f.write('#ifndef OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n')
-    f.write('#define OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n\n')
-    f.write('#include "util/common/buffer.hpp"\n\n')
-    f.write('#include "parsec/agent/runners/evm/hash.hpp"\n\n')
-
-    # The first 4 bytes of the input data sent to a contract are the method
-    # selector in ETH. It is the first 4 bytes of keccak256(<method signature>)
-    f.write('namespace cbdc::parsec::evm_contracts {\n')
-
-
-    # The first 4 bytes of the input data sent to a contract are the method
-    # selector in ETH. It is the first 4 bytes of keccak256(<method signature>)
-    f.write('  static constexpr size_t selector_size = 4;\n')
-
-    # Parameters in a method call are always 32 bytes
-    f.write('  static constexpr size_t param_size = 32;\n\n')
-
-    # Because parameters are 32 bytes, addresses need to be copied at a 12 bytes
-    # offset
-    f.write('  static constexpr size_t address_param_offset = 12; // in ABIs addresses are also 32 bytes\n')
-
-    # Generate methods for all contracts
-    for k, v in loaded_contracts.items():
-        # The data needed to deploy the contract, which is essentially the
-        # byte code parameter in the compiled asset JSON
-        f.write('  auto data_{}_deploy() -> cbdc::buffer {{\n'.format(k))
-        f.write('    auto buf = cbdc::buffer::from_hex("{}");\n'.format(v['bytecode'][2:]))
-        f.write('    return buf.value();\n')
-        f.write('  }\n\n')
-
-        # Loop over the functions in the ABI
-        for abi in v['abi']:
-            # Only make methods for functions, ignore events (for now)
-            if abi['type'] == 'function':
-                # Write the method name data_<shortname>_<methodname>
-                f.write('auto data_{}_{}('.format(k, to_snake(abi['name'])))
-
-                # Write all parameters as function arguments
-                inp_idx = 0
-                for inp in abi['inputs']:
-                    tp = 'bytes32'
-                    if inp['type'] == 'uint256':
-                        tp = 'uint256be'
-                    if inp['type'] == 'address':
-                        tp = 'address'
-                    if inp_idx > 0:
-                        f.write(', ')
-                    f.write('evmc::{} {}'.format(tp, to_snake(inp['name'])))
-                    inp_idx = inp_idx + 1
-
-                # Write the return method and creation of the empty buffer
-                f.write(') -> cbdc::buffer {\n')
-                f.write('    auto buf = cbdc::buffer();\n')
-
-                # Write the method selector calculation
-                f.write('    const auto selector_{name} = std::string("{name_raw}('.format_map(dict({'name':to_snake(abi['name']),'name_raw':abi['name']})))
-                inp_idx = 0
-                for inp in abi['inputs']:
-                    if inp_idx > 0:
-                        f.write(',')
-                    f.write(inp['type'])
-                    inp_idx = inp_idx + 1
-                f.write(')");\n')
-
-                # Write calculation of the selector hash and appending it to the buffer
-                f.write('    auto selector_hash = cbdc::keccak_data(selector_{name}.data(), selector_{name}.size());\n'.format_map(dict({'name':to_snake(abi['name'])})))
-                f.write('    buf.append(selector_hash.data(), selector_size);\n')
-
-                # Write code that appends the parameters to the buffer (if any)
-                if len(abi['inputs']) > 0:
-                    for i, inp in enumerate(abi['inputs']):
+contracts_dict = {'artifacts/contracts/ERC20.sol/Token.json':'erc20'}
+
+# helper functions
+def create_loaded_contracts(contracts: dict) -> dict:
+    '''
+    Load the JSON outputs of the hardhat compilation for
+    each contract we want to include in the header file
+    '''
+    loaded_contracts = {}
+    contracts_read = 0
+    for k, v in contracts.items():
+        try:
+            with open(k, 'r', encoding='utf-8') as file:
+                loaded_contracts[v] = json.load(file)
+                contracts_read += 1
+        except FileNotFoundError:
+            print(f'File {k} not found, skipping')
+            continue
+        except IOError:
+            print(f'Error reading {k}, skipping')
+            continue
+
+    if contracts_read == 0:
+        print('No contracts loaded, exiting')
+        exit(1)
+
+    return loaded_contracts
+
+def camel_to_snake(name) -> str:
+    '''
+    Function to convert camelCase to snake_case
+    '''
+    snake_convert_pattern = re.compile(r'(?<!^)(?=[A-Z])')
+    return snake_convert_pattern.sub('_', name).lower()
+
+# main function
+def write_header_file(loaded_contracts: dict) -> None:
+    '''
+    Function to write the header file
+    '''
+    # Make sure our output folder exists
+    output_folder = 'cpp_header'
+    output_file = f'{output_folder}/contracts.hpp'
+    os.makedirs(output_folder, exist_ok=True)
+
+    with open(output_file, 'w+', encoding='utf-8') as f:
+        # Write the standard copyright header in the header file
+        for line in copyright_license:
+            f.write(f'{line}\n')
+        f.write('\n')
+
+        f.write('#ifndef OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n')
+        f.write('#define OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n\n')
+        f.write('#include "util/common/buffer.hpp"\n\n')
+        f.write('#include "parsec/agent/runners/evm/hash.hpp"\n\n')
+
+        # Write the namespace for the contracts
+        f.write('namespace cbdc::parsec::evm_contracts {\n')
+
+        # The first 4 bytes of the input data sent to a contract is the
+        # method selector in ETH. It is the first 4 bytes of
+        # keccak256(<method signature>)
+        f.write('  static constexpr size_t selector_size = 4;\n')
+
+        # Parameters in a method call are always 32 bytes
+        f.write('  static constexpr size_t param_size = 32;\n\n')
+
+        # Since params are 32 bytes, addrs must be copied at a 12 bytes offset
+        f.write('  static constexpr size_t address_param_offset = 12; // in ABIs addresses are also 32 bytes\n')
+
+        # Generate methods for all contracts
+        for k, v in loaded_contracts.items():
+            # The data needed to deploy the contract, which is essentially the
+            # byte code parameter in the compiled asset JSON
+            f.write(f'  auto data_{k}_deploy() -> cbdc::buffer {{\n')
+            f.write(f'    auto buf = cbdc::buffer::from_hex("{v["bytecode"][2:]}");\n')
+            f.write('    return buf.value();\n')
+            f.write('  }\n\n')
+
+            # Loop over the functions in the ABI
+            for abi in v['abi']:
+                # Only make methods for functions, ignore events (for now)
+                if abi['type'] == 'function':
+                    # Write the method name data_<shortname>_<methodname>
+                    f.write(f'auto data_{k}_{camel_to_snake(abi["name"])}(')
+                    # Write all parameters as function arguments
+                    for idx, inp in enumerate(abi['inputs']):
+                        tp = 'bytes32'
+                        if inp['type'] == 'uint256':
+                            tp = 'uint256be'
                         if inp['type'] == 'address':
-                            f.write('  buf.extend(address_param_offset);\n')
-                        f.write('  buf.append({name}.bytes, sizeof({name}.bytes));\n'.format_map(dict({'name':to_snake(inp['name'])})))
-
-                # Return the buffer we built
-                f.write('    return buf;\n')
-                f.write('  }\n\n')
-
-    f.write('}\n\n')
-    f.write('#endif // OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n\n')
-
+                            tp = 'address'
+                        if idx > 0:
+                            f.write(', ')
+                        f.write(f'evmc::{tp} {camel_to_snake(inp["name"])}')
+
+                    # Write the return method and creation of the empty buffer
+                    f.write(') -> cbdc::buffer {\n')
+                    f.write('    auto buf = cbdc::buffer();\n')
+
+                    # Write the method selector calculation
+                    f.write('    const auto selector_{name} = std::string("{name_raw}('.format_map(dict({'name':camel_to_snake(abi['name']),'name_raw':abi['name']})))
+                    for idx, inp in enumerate(abi['inputs']):
+                        if idx > 0:
+                            f.write(',')
+                        f.write(inp['type'])
+                    f.write(')");\n')
+
+                    # Write calculation of the selector hash and appending it to the buffer
+                    f.write('    auto selector_hash = cbdc::keccak_data(selector_{name}.data(), selector_{name}.size());\n'.format_map(dict({'name':camel_to_snake(abi['name'])})))
+                    f.write('    buf.append(selector_hash.data(), selector_size);\n')
+
+                    # Write code that appends the params to the buffer (if any)
+                    if len(abi['inputs']) > 0:
+                        for inp in abi['inputs']:
+                            if inp['type'] == 'address':
+                                f.write('  buf.extend(address_param_offset);\n')
+                            f.write('  buf.append({name}.bytes, sizeof({name}.bytes));\n'.format_map(dict({'name':camel_to_snake(inp['name'])})))
+                    f.write('    return buf;\n  }\n\n')
+
+        f.write('}\n\n')
+        f.write('#endif // OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n\n')
+
+
+if __name__ == '__main__':
+
+    # Load the contracts
+    loaded_contracts_dict = create_loaded_contracts(contracts_dict)
+
+    # Write the header file
+    write_header_file(loaded_contracts_dict)