From a75b1d9a7930f0666d7416d6a4294e28f3abf6b8 Mon Sep 17 00:00:00 2001 From: Morgan Rockett Date: Tue, 9 Jul 2024 18:36:58 -0400 Subject: [PATCH] ci: added pylint in github workflow to enforce code quality; fixes #264 This commit made with the assistance of github copilot Signed-off-by: Morgan Rockett --- .github/workflows/ci.yml | 37 +- .pylintrc | 327 ++++++++++++++++++ README.md | 76 +++- scripts/plot-samples.py | 121 +++++++ scripts/plot.py | 96 ----- scripts/pylint.sh | 50 +++ .../bench/parsec/evm/contracts/gen_header.py | 243 +++++++------ 7 files changed, 736 insertions(+), 214 deletions(-) create mode 100644 .pylintrc create mode 100644 scripts/plot-samples.py delete mode 100644 scripts/plot.py create mode 100755 scripts/pylint.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cf85442c4..7ec1392a1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ env: jobs: build-release: name: Build Release Candidate - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 env: BUILD_RELEASE: 1 steps: @@ -30,7 +30,7 @@ jobs: - name: Setup Local Dependencies run: ./scripts/setup-dependencies.sh - name: Build - run: scripts/build.sh + run: ./scripts/build.sh lint: name: Lint runs-on: ubuntu-20.04 @@ -43,12 +43,32 @@ jobs: - name: Setup Local Dependencies run: ./scripts/setup-dependencies.sh - name: Build - run: scripts/build.sh + run: ./scripts/build.sh - name: Lint - run: scripts/lint.sh + run: ./scripts/lint.sh + pylint: + name: Pylint + runs-on: ubuntu-22.04 + continue-on-error: true + timeout-minutes: 10 + strategy: + matrix: + python-version: ["3.10"] + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Setup Build Env + run: sudo ./scripts/install-build-tools.sh + - name: Lint with Pylint + run: ./scripts/pylint.sh unit-and-integration-test: name: Unit and Integration Tests - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 timeout-minutes: 30 steps: - uses: actions/checkout@v2 @@ -59,9 +79,9 @@ jobs: - name: Setup Local Dependencies run: ./scripts/setup-dependencies.sh - name: Build - run: scripts/build.sh + run: ./scripts/build.sh - name: Run Unit Tests - run: scripts/test.sh + run: ./scripts/test.sh - name: Shorten SHA id: vars run: echo "::set-output name=sha_short::$(git rev-parse --short HEAD)" @@ -76,7 +96,7 @@ jobs: retention-days: 7 doxygen: name: doxygen - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v2 with: @@ -94,3 +114,4 @@ jobs: name: OpenCBDC Transaction Processor docs for ${{ steps.vars.outputs.sha_short }} path: ./doxygen_generated/html/* retention-days: 7 + diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 000000000..1daa6a69d --- /dev/null +++ b/.pylintrc @@ -0,0 +1,327 @@ +# Documentation: +# https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html + +[MAIN] + +# Specify a score threshold under which the program will exit with error. +fail-under=10 + +# Files or directories to be skipped. They should be base names, not paths. +ignore=CVS + +# Files or directories matching the regular expression patterns are skipped. +# The regex matches against base names, not paths. The default value ignores +# Emacs file locks +ignore-patterns=^\.# + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. Set to 0 for parallel processesing (default is 1) +jobs=0 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# Pickle collected data for later comparisons. +persistent=yes + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.10 + +# Discover python modules and packages in the file system subtree. +recursive=yes + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Naming style matching correct class constant names. +class-const-naming-style=snake_case + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Naming style matching correct constant names. +const-naming-style=snake_case + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Naming style matching correct variable names. +variable-naming-style=snake_case + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + asyncSetUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit + + +[DESIGN] + +# Maximum number of arguments for function / method. +max-args=10 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=24 + +# Maximum number of locals for function / method body. +max-locals=50 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of return / yield for function / method body. +max-returns=10 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=builtins.BaseException,builtins.Exception + + +[FORMAT] + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=79 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=yes + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[MESSAGES CONTROL] + + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + use-implicit-booleaness-not-comparison-to-string, + use-implicit-booleaness-not-comparison-to-zero + + +[METHOD_ARGS] + +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,XXX,TODO + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=7 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + +# Let 'consider-using-join' be raised when the separator to join on would be +# non-empty (resulting in expected fixes of the type: ``"- " + " - +# ".join(items)``) +suggest-join-with-non-empty-separator=yes + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Tells whether to display a full report or only the messages. Keep to toggle it +reports=no + +# Activate the evaluation score. +score=yes + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, + not-async-context-manager, + not-context-manager, + attribute-defined-outside-init + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + + +[VARIABLES] + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=yes + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + diff --git a/README.md b/README.md index c066a4df8..c9215a31d 100644 --- a/README.md +++ b/README.md @@ -18,8 +18,11 @@ The design decisions we made to achieve these goals will help inform policy make **NOTE:** In cases where there are significant changes to the repository that might need manual intervention down-stream (or other important updates), we will [make a NEWS post](NEWS.md). # Architecture + We have explored several architectures under two broad categories as follows: + ## UHS-Based Transaction Processor + We explored two system architectures for transaction settlement based on an [unspent transaction output (UTXO)](https://en.wikipedia.org/wiki/Unspent_transaction_output) data model and transaction format. Both architectures implement the same schema representing an [unspent hash set (UHS)](https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2018-May/015967.html) abstraction. One architecture provides [linearizability](https://en.wikipedia.org/wiki/linearizability) of transactions, whereas the other only provides [serializability](https://en.wikipedia.org/wiki/Serializability). @@ -27,6 +30,7 @@ By relaxing the ordering constraint, the peak transaction throughput supported b Both architectures handle multiple geo-distributed datacenter outages with a [recovery time objective (RTO)](https://en.wikipedia.org/wiki/Disaster_recovery#Recovery_Time_Objective) of under ten seconds and a [recovery point objective (RPO)](https://en.wikipedia.org/wiki/Disaster_recovery#Recovery_Point_Objective) of zero. There are two UHS-based architectures as follows: + 1. "Atomizer" architecture - Materializes a total ordering of all transactions settled by the system in a linear sequence of batches. - Requires vertical scaling as peak transaction throughput is limited by the performance of a single system component. @@ -41,9 +45,11 @@ There are two UHS-based architectures as follows: Read the [2PC & Atomizer architecture guide](docs/uhs-architectures.md) for a detailed description of the system components and implementation of each architecture. ## Parallel Architecture for Scalably Executing smart Contracts ("PArSEC") + We built a system with a generic virtual machine layer that is capable of performing parallel executions of smart contracts. The architecture is composed of two layers: + 1. A distributed key-value data store with [ACID](https://en.wikipedia.org/wiki/ACID) database properties - This back-end data store is not constrained to any type of data and is agnostic to the execution later. 1. A generic virtual machine layer that executes programs (i.e. smart contracts) and uses the distributed key-value data store to record state @@ -54,6 +60,7 @@ The architecture is composed of two layers: - Unmodified smart contracts from the Ethereum ecosystem can be deployed directly onto our EVM implementation. Read the [PArSEC Architecture Guide](docs/parsec_architecture.md) for more details. + # Contributing You can [sign up](https://dci.mit.edu/opencbdc-interest) to receive updates from technical working groups and to learn more about our work. @@ -67,7 +74,9 @@ If you want to dive straight in, take a look at our issue tracker's list of [goo 1. [Install Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) 1. Clone the repository (including submodules) - - `git clone --recurse-submodules https://github.com/mit-dci/opencbdc-tx` + ```console + $ git clone --recurse-submodules https://github.com/mit-dci/opencbdc-tx + ``` # Setup the build envirnoment @@ -80,6 +89,11 @@ If you just want to run the system, see "Run the Code" below. ```console # ./scripts/install-build-tools.sh ``` + Note: Running Homebrew as root on mac via shell script is not supported, so run without sudo and when prompted, enter the root password. + ```console + $ ./scripts/install-build-tools.sh + ``` + 1. Setup project dependencies This script builds and installs a local copy of several build-dependencies which are not widely packaged. Because it installs to a local, configurable prefix (defaulting to `./prefix`), it does not need root permissions to run. @@ -93,20 +107,26 @@ If you just want to run the system, see "Run the Code" below. ``` ## macOS + Note that if you have not already installed the xcode cli tools you will need to: ```console # xcode-select --install ``` + # Run the Code The API Reference is now housed in [an external repository](https://github.com/mit-dci/opencbdc-tx-pages/). See the [live deployment](https://mit-dci.github.io/opencbdc-tx-pages/) to browse. ## UHS-based Architectures (2PC & Atomizer) + See the [2PC & Atomizer User Guide](docs/2pc_atomizer_user_guide.md) + ## PArSEC Architecture + See the [PArSEC User Guide](docs/parsec_user_guide.md) + # Testing Running Unit & Integration Tests @@ -123,6 +143,7 @@ Running Unit & Integration Tests ## E2E Testing with Kubernetes ### Requirements + - Go (go test library used to run tests) - Minikube - Helm @@ -130,6 +151,53 @@ Running Unit & Integration Tests ### Running tests -1. `./scripts/build-docker.sh` -1. `./scripts/test-e2e-minikube.sh` -1. Review results and logs at `testruns//` +```console +$ ./scripts/build-docker.sh +``` + +```console +$ ./scripts/test-e2e-minikube.sh +``` + +Review results and logs at `testruns//` + +## Linting + +### General + +This script checks for newlines at the end of all tracked git files except images. +Then it runs clang-format and clang-tidy on `.cpp` files in the following directories: + `src`, `tests`, `cmake-tests`, `tools`. + +```console +$ ./scripts/lint.sh +``` + +### Python + +Lint all python files according to ruleset defined in `.pylintrc`. +Optional code quality value >= 5.0 and <= 10.0 can be entered as a threshold of failure. + +```console +$ ./scripts/pylint.sh 8.0 +``` + +## Virtual Environment for Python + +`./scripts/install-build-tools.sh` creates a virtual environemnt. +Once run, follow these steps to run python code. + +1. Activate the virtual environment which has the required python version and packages installed. + ```console + $ source ./scripts/activate-venv.sh + ``` + +2. Run python code + ```console + (.py_venv) $ python ./scripts/.py + ``` + +3. Exit the virtual environment + ```console + (.py_venv) $ deactivate + ``` diff --git a/scripts/plot-samples.py b/scripts/plot-samples.py new file mode 100644 index 000000000..d070d1ded --- /dev/null +++ b/scripts/plot-samples.py @@ -0,0 +1,121 @@ +import sys +import os.path +import glob +import argparse +import matplotlib.pyplot as plt +import numpy as np + + +def parse_args(): + ''' + Allow user to specify the directory containing the performance data + example usage: python plot-samples.py + generates plots for all tx_samples in the specified directory + ''' + parser = argparse.ArgumentParser(description= + 'Plot performance data from tx_samples') + # help message for the directory argument + parser.add_argument('-d', '--dir', dest='tests_dir', + action='store', default='.', type=str, + help='Directory containing performance data') + return parser.parse_args() + + +def plot_latency(fname, fig=None, ax1=None, ax2=None): + ''' + Plot the throughput and latency data from a file in the + tx_samples directory - called by scripts/native-system-benchmark.sh + # list, Axes -> void + ''' + x, y, th_moving_avg, rates, tx_vals = [], [], [], [], [] + fresh = False # is this the plot of all data, or just a single plot + + if not fig: # create new axes if necessary + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) + fig.suptitle(fname+" performance data") + fresh = True + + local_file = fname.split('/')[-1].split('_') + filename = f"loadgen_{local_file[-1].split('.')[0]}" + + # get data from file + data = read_in_data(fname) + # first sample in file is reference time 0 + time_start = int(data[0].split()[0]) + + # Format data for plotting + t_prev = 0 + for idx, line in enumerate(data): + d = line.split() + if len(d) < 2: + break + a = line.split() + x.append((int(a[0]) - time_start)/10**9) + y.append(int(a[1])/10**9) + if x[idx] - x[t_prev] > 1: + tx_vals.append(x[idx]) + rates.append(idx - t_prev) + th_moving_avg.append(np.mean(rates)) + t_prev = idx + + # get line of best fit + f1, f2 = np.polyfit(x, y, 1) + f1 = round(f1, 3) + f2 = round(f2, 3) + + # plot latency data + ax2.set_title("Tx delay (s) vs time since start (s)") + ax2.plot(x, y, label=f'{filename}: data') + sign = '+ ' if f2 > 0 else '' + label = f"{filename}: Line of best fit: {f1}(sec) {sign}{f2}" + ax2.plot(np.array(x), f1*np.array(x)+f2, label=label) + ax2.legend(loc="upper right") + ax2.set(xlabel="Time (s)", ylabel="Latency (s)") + + # plot throughput data + ax1.set_title("Throughput (TX/s) vs. time (s)") + ax1.plot(tx_vals, rates, label="Throughput") + ax1.plot(tx_vals, th_moving_avg, label="(Moving) Average Throughput") + ax1.legend(loc="upper right") + ax1.set(xlabel="Time (s)", ylabel="Throughput (TX/s)") + if fresh: + fig.savefig(f"{filename}_performance.png") + + +def read_in_data(fname) -> list: + ''' + get data from file and return as a list of lines + ''' + if not os.path.isfile(fname): + print(f'File {fname} does not exist') + sys.exit(1) + + lines = [] + try: + with open(fname, 'r') as f: + lines = f.readlines() + except IOError as e: + print(f'Error reading from file {fname}\n{e}\n') + sys.exit(1) + + return lines + + +if __name__ == '__main__': + + args = parse_args() + tests_dir = args.tests_dir + + # Get all tx sample files in the test directory + f_list = glob.glob(f'{tests_dir}/tx_samples_*.txt') + if not f_list: + print(f'No tx_samples files found in {tests_dir = }') + sys.exit(1) + + global_fig, global_axs = plt.subplots(1, 2, figsize=(12, 5)) + + for file in f_list: + plot_latency(file) + plot_latency(file, global_fig, global_axs[0], global_axs[1]) + + global_fig.savefig(f'{tests_dir}/aggregate_performance.png') diff --git a/scripts/plot.py b/scripts/plot.py deleted file mode 100644 index 855dcdef7..000000000 --- a/scripts/plot.py +++ /dev/null @@ -1,96 +0,0 @@ -import matplotlib.pyplot as plt -import numpy as np -import os.path -import glob -import sys - -# Usage: python plot.py -# generate plots from tx_samples -# list, Axes -> void -def plot_latency(fname, fig=None, ax1=None, ax2=None): - x = [] - y = [] - th_moving_avg = [] - rates = [] - tx_vals = [] - fresh = False # is this the plot of all data, or just a single plot - - if (fig == None): # create new axes if necessary - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) - fig.suptitle(fname+" performance data") - fresh = True - local_file = fname.split('/') - local_file = local_file[-1].split('_') - id = local_file[-1].split('.')[0] - nm = 'loadgen_' + id - - # get data from file - data = read_in_data(fname) - # first sample in file is reference time 0 - time_start = int(data[0].split()[0]) - - queue = [] - queue_max = 15 - - t_prev = 0 - - # Format data for plotting - for i in range(len(data)): - d = data[i].split() - if (len(d) < 2): - break - a = data[i].split() - x.append((int(a[0]) - time_start)/10**9) - y.append(int(a[1])/10**9) - if (x[i] - x[t_prev] > 1): - tx_vals.append(x[i]) - rates.append(i-t_prev) - th_moving_avg.append(np.mean(rates)) - t_prev = i - - # get line of best fit - f1, f2 = np.polyfit(x, y, 1) - f1 = round(f1, 3) - f2 = round(f2, 3) - - # plot latency data - ax2.set_title("Tx delay (s) vs time since start (s)") - string = nm + ': data' - ax2.plot(x, y, label=string) - sign = '+ ' if f2 > 0 else '' - string = "Line of best fit: " + str(f1) + "(sec) " + sign + str(f2) - string = nm + ': ' + string - ax2.plot(np.array(x), f1*np.array(x)+f2, label=string) - ax2.legend(loc="upper right") - ax2.set(xlabel="Time (s)", ylabel="Latency (s)") - - # plot throughput data - ax1.set_title("Throughput (TX/s) vs. time (s)") - ax1.plot(tx_vals, rates, label="Throughput") - ax1.plot(tx_vals, th_moving_avg, label="(Moving) Average Throughput") - ax1.legend(loc="upper right") - ax1.set(xlabel="Time (s)", ylabel="Throughput (TX/s)") - if (fresh): - fig.savefig(nm + "_performance.png") - -# get data from file -def read_in_data(fname): - if (not os.path.isfile(fname)): - raise Exception("Cannot find file " + fname) - fin = open(fname, "r") - data = fin.readlines() - fin.close() - return data - - -if __name__ == '__main__': - path = "." - # Get path to test data - if (len(sys.argv) > 1): - path = str(sys.argv[1]) - f_list = glob.glob(path + '/tx_samples_*.txt') - global_fig, global_axs = plt.subplots(1, 2, figsize=(12, 5)) - for fin in f_list: - plot_latency(fin) - plot_latency(fin, global_fig, global_axs[0], global_axs[1]) - global_fig.savefig(path + "/aggregate_performance.png") diff --git a/scripts/pylint.sh b/scripts/pylint.sh new file mode 100755 index 000000000..9a31cc8d1 --- /dev/null +++ b/scripts/pylint.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +ROOT="$(cd "$(dirname "$0")"/.. && pwd)" +PREFIX="${ROOT}"/prefix +MIN_CODE_QUALITY=8.0 + +get_code_score() { + if [ -n "$1" ]; then + # set minimum quality to user input (int/float) if provided and (5.0 <= input <= 10.0) + if [[ $1 =~ ^([0-9]+)*([\.][0-9])?$ ]]; then + if (( $(echo "$1 >= 5.0" | bc -l) )) && (( $(echo "$1 <= 10.0" | bc -l) )); then + MIN_CODE_QUALITY=$1 + else + # In the future, we want code quality to be at minimum 8.0/10.0 + echo "Code quality score must be between 5.0 and 10.0, inclusive." + echo "Recommended code quality score is >= 8.0." + exit 1 + fi + else + echo "Code quality score must be an integer or floating point number." + exit 1 + fi + fi + echo "Linting Python code with minimum quality of $MIN_CODE_QUALITY/10.0..." +} + +check_pylint() { + if ! command -v pylint &>/dev/null; then + echo "pylint is not installed." + echo "Run 'sudo ./scripts/install-build-tools.sh' to install pylint." + exit 1 + fi +} + +get_code_score $1 +if source "${ROOT}/scripts/activate-venv.sh"; then + echo "Virtual environment activated." +else + echo "Failed to activate virtual environment." + exit 1 +fi + +check_pylint +if ! pylint scripts src tests tools --rcfile=.pylintrc \ + --fail-under=$MIN_CODE_QUALITY $(git ls-files '*.py'); then + echo "Linting failed, please fix the issues and rerun." + exit 1 +else + echo "Linting passed." +fi diff --git a/tools/bench/parsec/evm/contracts/gen_header.py b/tools/bench/parsec/evm/contracts/gen_header.py index e77c9a9fd..d8b8a8712 100644 --- a/tools/bench/parsec/evm/contracts/gen_header.py +++ b/tools/bench/parsec/evm/contracts/gen_header.py @@ -2,15 +2,16 @@ # Federal Reserve Bank of Boston # Distributed under the MIT software license, see the accompanying # file COPYING or http://www.opensource.org/licenses/mit-license.php. -import json import os +import json import re -# Conversion method from camelCase to snake_case -snake_convert_pattern = re.compile(r'(?_ for generating the input data necessary to # call the given method on the contract -contracts = {'artifacts/contracts/ERC20.sol/Token.json':'erc20'} - -# Load the JSON outputs of the hardhat compilation for each contract we want -# to include in the header file -loaded_contracts = {} -for k, v in contracts.items(): - with open(k) as f: - loaded_contracts[v] = json.load(f) - -# Make sure our output folder exists -if not os.path.exists('cpp_header'): - os.makedirs('cpp_header') - -with open('cpp_header/contracts.hpp', 'w+') as f: - # Write the standard copyright header in the header file - f.write('// Copyright (c) 2022 MIT Digital Currency Initiative,\n') - f.write('// Federal Reserve Bank of Boston\n') - f.write('// Distributed under the MIT software license, see the accompanying\n') - f.write('// file COPYING or http://www.opensource.org/licenses/mit-license.php.\n\n') - f.write('#ifndef OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n') - f.write('#define OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n\n') - f.write('#include "util/common/buffer.hpp"\n\n') - f.write('#include "parsec/agent/runners/evm/hash.hpp"\n\n') - - # The first 4 bytes of the input data sent to a contract are the method - # selector in ETH. It is the first 4 bytes of keccak256() - f.write('namespace cbdc::parsec::evm_contracts {\n') - - - # The first 4 bytes of the input data sent to a contract are the method - # selector in ETH. It is the first 4 bytes of keccak256() - f.write(' static constexpr size_t selector_size = 4;\n') - - # Parameters in a method call are always 32 bytes - f.write(' static constexpr size_t param_size = 32;\n\n') - - # Because parameters are 32 bytes, addresses need to be copied at a 12 bytes - # offset - f.write(' static constexpr size_t address_param_offset = 12; // in ABIs addresses are also 32 bytes\n') - - # Generate methods for all contracts - for k, v in loaded_contracts.items(): - # The data needed to deploy the contract, which is essentially the - # byte code parameter in the compiled asset JSON - f.write(' auto data_{}_deploy() -> cbdc::buffer {{\n'.format(k)) - f.write(' auto buf = cbdc::buffer::from_hex("{}");\n'.format(v['bytecode'][2:])) - f.write(' return buf.value();\n') - f.write(' }\n\n') - - # Loop over the functions in the ABI - for abi in v['abi']: - # Only make methods for functions, ignore events (for now) - if abi['type'] == 'function': - # Write the method name data__ - f.write('auto data_{}_{}('.format(k, to_snake(abi['name']))) - - # Write all parameters as function arguments - inp_idx = 0 - for inp in abi['inputs']: - tp = 'bytes32' - if inp['type'] == 'uint256': - tp = 'uint256be' - if inp['type'] == 'address': - tp = 'address' - if inp_idx > 0: - f.write(', ') - f.write('evmc::{} {}'.format(tp, to_snake(inp['name']))) - inp_idx = inp_idx + 1 - - # Write the return method and creation of the empty buffer - f.write(') -> cbdc::buffer {\n') - f.write(' auto buf = cbdc::buffer();\n') - - # Write the method selector calculation - f.write(' const auto selector_{name} = std::string("{name_raw}('.format_map(dict({'name':to_snake(abi['name']),'name_raw':abi['name']}))) - inp_idx = 0 - for inp in abi['inputs']: - if inp_idx > 0: - f.write(',') - f.write(inp['type']) - inp_idx = inp_idx + 1 - f.write(')");\n') - - # Write calculation of the selector hash and appending it to the buffer - f.write(' auto selector_hash = cbdc::keccak_data(selector_{name}.data(), selector_{name}.size());\n'.format_map(dict({'name':to_snake(abi['name'])}))) - f.write(' buf.append(selector_hash.data(), selector_size);\n') - - # Write code that appends the parameters to the buffer (if any) - if len(abi['inputs']) > 0: - for i, inp in enumerate(abi['inputs']): +contracts_dict = {'artifacts/contracts/ERC20.sol/Token.json':'erc20'} + +# helper functions +def create_loaded_contracts(contracts: dict) -> dict: + ''' + Load the JSON outputs of the hardhat compilation for + each contract we want to include in the header file + ''' + loaded_contracts = {} + contracts_read = 0 + for k, v in contracts.items(): + try: + with open(k, 'r', encoding='utf-8') as file: + loaded_contracts[v] = json.load(file) + contracts_read += 1 + except FileNotFoundError: + print(f'File {k} not found, skipping') + continue + except IOError: + print(f'Error reading {k}, skipping') + continue + + if contracts_read == 0: + print('No contracts loaded, exiting') + exit(1) + + return loaded_contracts + +def camel_to_snake(name) -> str: + ''' + Function to convert camelCase to snake_case + ''' + snake_convert_pattern = re.compile(r'(? None: + ''' + Function to write the header file + ''' + # Make sure our output folder exists + output_folder = 'cpp_header' + output_file = f'{output_folder}/contracts.hpp' + os.makedirs(output_folder, exist_ok=True) + + with open(output_file, 'w+', encoding='utf-8') as f: + # Write the standard copyright header in the header file + for line in copyright_license: + f.write(f'{line}\n') + f.write('\n') + + f.write('#ifndef OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n') + f.write('#define OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n\n') + f.write('#include "util/common/buffer.hpp"\n\n') + f.write('#include "parsec/agent/runners/evm/hash.hpp"\n\n') + + # Write the namespace for the contracts + f.write('namespace cbdc::parsec::evm_contracts {\n') + + # The first 4 bytes of the input data sent to a contract is the + # method selector in ETH. It is the first 4 bytes of + # keccak256() + f.write(' static constexpr size_t selector_size = 4;\n') + + # Parameters in a method call are always 32 bytes + f.write(' static constexpr size_t param_size = 32;\n\n') + + # Since params are 32 bytes, addrs must be copied at a 12 bytes offset + f.write(' static constexpr size_t address_param_offset = 12; // in ABIs addresses are also 32 bytes\n') + + # Generate methods for all contracts + for k, v in loaded_contracts.items(): + # The data needed to deploy the contract, which is essentially the + # byte code parameter in the compiled asset JSON + f.write(f' auto data_{k}_deploy() -> cbdc::buffer {{\n') + f.write(f' auto buf = cbdc::buffer::from_hex("{v["bytecode"][2:]}");\n') + f.write(' return buf.value();\n') + f.write(' }\n\n') + + # Loop over the functions in the ABI + for abi in v['abi']: + # Only make methods for functions, ignore events (for now) + if abi['type'] == 'function': + # Write the method name data__ + f.write(f'auto data_{k}_{camel_to_snake(abi["name"])}(') + # Write all parameters as function arguments + for idx, inp in enumerate(abi['inputs']): + tp = 'bytes32' + if inp['type'] == 'uint256': + tp = 'uint256be' if inp['type'] == 'address': - f.write(' buf.extend(address_param_offset);\n') - f.write(' buf.append({name}.bytes, sizeof({name}.bytes));\n'.format_map(dict({'name':to_snake(inp['name'])}))) - - # Return the buffer we built - f.write(' return buf;\n') - f.write(' }\n\n') - - f.write('}\n\n') - f.write('#endif // OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n\n') - + tp = 'address' + if idx > 0: + f.write(', ') + f.write(f'evmc::{tp} {camel_to_snake(inp["name"])}') + + # Write the return method and creation of the empty buffer + f.write(') -> cbdc::buffer {\n') + f.write(' auto buf = cbdc::buffer();\n') + + # Write the method selector calculation + f.write(' const auto selector_{name} = std::string("{name_raw}('.format_map(dict({'name':camel_to_snake(abi['name']),'name_raw':abi['name']}))) + for idx, inp in enumerate(abi['inputs']): + if idx > 0: + f.write(',') + f.write(inp['type']) + f.write(')");\n') + + # Write calculation of the selector hash and appending it to the buffer + f.write(' auto selector_hash = cbdc::keccak_data(selector_{name}.data(), selector_{name}.size());\n'.format_map(dict({'name':camel_to_snake(abi['name'])}))) + f.write(' buf.append(selector_hash.data(), selector_size);\n') + + # Write code that appends the params to the buffer (if any) + if len(abi['inputs']) > 0: + for inp in abi['inputs']: + if inp['type'] == 'address': + f.write(' buf.extend(address_param_offset);\n') + f.write(' buf.append({name}.bytes, sizeof({name}.bytes));\n'.format_map(dict({'name':camel_to_snake(inp['name'])}))) + f.write(' return buf;\n }\n\n') + + f.write('}\n\n') + f.write('#endif // OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n\n') + + +if __name__ == '__main__': + + # Load the contracts + loaded_contracts_dict = create_loaded_contracts(contracts_dict) + + # Write the header file + write_header_file(loaded_contracts_dict)