Skip to content

Commit

Permalink
Add a SARIF output formatter (#1113)
Browse files Browse the repository at this point in the history
This commit adds a formatter that outputs JSON in a specific
SARIF format according to spec at [1].

This code is largely leveraged from an existing implementation
found here [2].

SARIF format is very useful for integration into ecosystems such
as GitHub's Actions.

[1] https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html
[2] https://github.com/microsoft/bandit-sarif-formatter

Closes #646

Signed-off-by: Eric Brown <[email protected]>
  • Loading branch information
ericwb authored Mar 8, 2024
1 parent b603dce commit 22b4226
Show file tree
Hide file tree
Showing 8 changed files with 530 additions and 1 deletion.
1 change: 1 addition & 0 deletions bandit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@
from bandit.core.issue import * # noqa
from bandit.core.test_properties import * # noqa

__author__ = metadata.metadata("bandit")["Author"]
__version__ = metadata.version("bandit")
372 changes: 372 additions & 0 deletions bandit/formatters/sarif.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,372 @@
# Copyright (c) Microsoft. All Rights Reserved.
#
# SPDX-License-Identifier: Apache-2.0
#
# Note: this code mostly incorporated from
# https://github.com/microsoft/bandit-sarif-formatter
#
r"""
===============
SARIF formatter
===============
This formatter outputs the issues in SARIF formatted JSON.
:Example:
.. code-block:: javascript
{
"runs": [
{
"tool": {
"driver": {
"name": "Bandit",
"organization": "PyCQA",
"rules": [
{
"id": "B101",
"name": "assert_used",
"properties": {
"tags": [
"security",
"external/cwe/cwe-703"
],
"precision": "high"
},
"helpUri": "https://bandit.readthedocs.io/en/1.7.8/plugins/b101_assert_used.html"
}
],
"version": "1.7.8",
"semanticVersion": "1.7.8"
}
},
"invocations": [
{
"executionSuccessful": true,
"endTimeUtc": "2024-03-05T03:28:48Z"
}
],
"properties": {
"metrics": {
"_totals": {
"loc": 1,
"nosec": 0,
"skipped_tests": 0,
"SEVERITY.UNDEFINED": 0,
"CONFIDENCE.UNDEFINED": 0,
"SEVERITY.LOW": 1,
"CONFIDENCE.LOW": 0,
"SEVERITY.MEDIUM": 0,
"CONFIDENCE.MEDIUM": 0,
"SEVERITY.HIGH": 0,
"CONFIDENCE.HIGH": 1
},
"./examples/assert.py": {
"loc": 1,
"nosec": 0,
"skipped_tests": 0,
"SEVERITY.UNDEFINED": 0,
"SEVERITY.LOW": 1,
"SEVERITY.MEDIUM": 0,
"SEVERITY.HIGH": 0,
"CONFIDENCE.UNDEFINED": 0,
"CONFIDENCE.LOW": 0,
"CONFIDENCE.MEDIUM": 0,
"CONFIDENCE.HIGH": 1
}
}
},
"results": [
{
"message": {
"text": "Use of assert detected. The enclosed code will be removed when compiling to optimised byte code."
},
"level": "note",
"locations": [
{
"physicalLocation": {
"region": {
"snippet": {
"text": "assert True\n"
},
"endColumn": 11,
"endLine": 1,
"startColumn": 0,
"startLine": 1
},
"artifactLocation": {
"uri": "examples/assert.py"
},
"contextRegion": {
"snippet": {
"text": "assert True\n"
},
"endLine": 1,
"startLine": 1
}
}
}
],
"properties": {
"issue_confidence": "HIGH",
"issue_severity": "LOW"
},
"ruleId": "B101",
"ruleIndex": 0
}
]
}
],
"version": "2.1.0",
"$schema": "https://json.schemastore.org/sarif-2.1.0.json"
}
.. versionadded:: 1.7.8
""" # noqa: E501
import logging
import pathlib
import sys
import urllib.parse as urlparse
from datetime import datetime

import sarif_om as om
from jschema_to_python.to_json import to_json

import bandit
from bandit.core import docs_utils

LOG = logging.getLogger(__name__)
SCHEMA_URI = "https://json.schemastore.org/sarif-2.1.0.json"
SCHEMA_VER = "2.1.0"
TS_FORMAT = "%Y-%m-%dT%H:%M:%SZ"


def report(manager, fileobj, sev_level, conf_level, lines=-1):
"""Prints issues in SARIF format
:param manager: the bandit manager object
:param fileobj: The output file object, which may be sys.stdout
:param sev_level: Filtering severity level
:param conf_level: Filtering confidence level
:param lines: Number of lines to report, -1 for all
"""

log = om.SarifLog(
schema_uri=SCHEMA_URI,
version=SCHEMA_VER,
runs=[
om.Run(
tool=om.Tool(
driver=om.ToolComponent(
name="Bandit",
organization=bandit.__author__,
semantic_version=bandit.__version__,
version=bandit.__version__,
)
),
invocations=[
om.Invocation(
end_time_utc=datetime.utcnow().strftime(TS_FORMAT),
execution_successful=True,
)
],
properties={"metrics": manager.metrics.data},
)
],
)

run = log.runs[0]
invocation = run.invocations[0]

skips = manager.get_skipped()
add_skipped_file_notifications(skips, invocation)

issues = manager.get_issue_list(sev_level=sev_level, conf_level=conf_level)

add_results(issues, run)

serializedLog = to_json(log)

with fileobj:
fileobj.write(serializedLog)

if fileobj.name != sys.stdout.name:
LOG.info("SARIF output written to file: %s", fileobj.name)


def add_skipped_file_notifications(skips, invocation):
if skips is None or len(skips) == 0:
return

if invocation.tool_configuration_notifications is None:
invocation.tool_configuration_notifications = []

for skip in skips:
(file_name, reason) = skip

notification = om.Notification(
level="error",
message=om.Message(text=reason),
locations=[
om.Location(
physical_location=om.PhysicalLocation(
artifact_location=om.ArtifactLocation(
uri=to_uri(file_name)
)
)
)
],
)

invocation.tool_configuration_notifications.append(notification)


def add_results(issues, run):
if run.results is None:
run.results = []

rules = {}
rule_indices = {}
for issue in issues:
result = create_result(issue, rules, rule_indices)
run.results.append(result)

if len(rules) > 0:
run.tool.driver.rules = list(rules.values())


def create_result(issue, rules, rule_indices):
issue_dict = issue.as_dict()

rule, rule_index = create_or_find_rule(issue_dict, rules, rule_indices)

physical_location = om.PhysicalLocation(
artifact_location=om.ArtifactLocation(
uri=to_uri(issue_dict["filename"])
)
)

add_region_and_context_region(
physical_location,
issue_dict["line_range"],
issue_dict["col_offset"],
issue_dict["end_col_offset"],
issue_dict["code"],
)

return om.Result(
rule_id=rule.id,
rule_index=rule_index,
message=om.Message(text=issue_dict["issue_text"]),
level=level_from_severity(issue_dict["issue_severity"]),
locations=[om.Location(physical_location=physical_location)],
properties={
"issue_confidence": issue_dict["issue_confidence"],
"issue_severity": issue_dict["issue_severity"],
},
)


def level_from_severity(severity):
if severity == "HIGH":
return "error"
elif severity == "MEDIUM":
return "warning"
elif severity == "LOW":
return "note"
else:
return "warning"


def add_region_and_context_region(
physical_location, line_range, col_offset, end_col_offset, code
):
if code:
first_line_number, snippet_lines = parse_code(code)
snippet_line = snippet_lines[line_range[0] - first_line_number]
snippet = om.ArtifactContent(text=snippet_line)
else:
snippet = None

physical_location.region = om.Region(
start_line=line_range[0],
end_line=line_range[1] if len(line_range) > 1 else line_range[0],
start_column=col_offset + 1,
end_column=end_col_offset + 1,
snippet=snippet,
)

if code:
physical_location.context_region = om.Region(
start_line=first_line_number,
end_line=first_line_number + len(snippet_lines) - 1,
snippet=om.ArtifactContent(text="".join(snippet_lines)),
)


def parse_code(code):
code_lines = code.split("\n")

# The last line from the split has nothing in it; it's an artifact of the
# last "real" line ending in a newline. Unless, of course, it doesn't:
last_line = code_lines[len(code_lines) - 1]

last_real_line_ends_in_newline = False
if len(last_line) == 0:
code_lines.pop()
last_real_line_ends_in_newline = True

snippet_lines = []
first_line_number = 0
first = True
for code_line in code_lines:
number_and_snippet_line = code_line.split(" ", 1)
if first:
first_line_number = int(number_and_snippet_line[0])
first = False

snippet_line = number_and_snippet_line[1] + "\n"
snippet_lines.append(snippet_line)

if not last_real_line_ends_in_newline:
last_line = snippet_lines[len(snippet_lines) - 1]
snippet_lines[len(snippet_lines) - 1] = last_line[: len(last_line) - 1]

return first_line_number, snippet_lines


def create_or_find_rule(issue_dict, rules, rule_indices):
rule_id = issue_dict["test_id"]
if rule_id in rules:
return rules[rule_id], rule_indices[rule_id]

rule = om.ReportingDescriptor(
id=rule_id,
name=issue_dict["test_name"],
help_uri=docs_utils.get_url(rule_id),
properties={
"tags": [
"security",
f"external/cwe/cwe-{issue_dict['issue_cwe'].get('id')}",
],
"precision": issue_dict["issue_confidence"].lower(),
},
)

index = len(rules)
rules[rule_id] = rule
rule_indices[rule_id] = index
return rule, index


def to_uri(file_path):
pure_path = pathlib.PurePath(file_path)
if pure_path.is_absolute():
return pure_path.as_uri()
else:
# Replace backslashes with slashes.
posix_path = pure_path.as_posix()
# %-encode special characters.
return urlparse.quote(posix_path)
5 changes: 5 additions & 0 deletions doc/source/formatters/sarif.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-----
sarif
-----

.. automodule:: bandit.formatters.sarif
2 changes: 1 addition & 1 deletion doc/source/man/bandit.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ OPTIONS
(-l for LOW, -ll for MEDIUM, -lll for HIGH)
-i, --confidence report only issues of a given confidence level or
higher (-i for LOW, -ii for MEDIUM, -iii for HIGH)
-f {csv,custom,html,json,screen,txt,xml,yaml}, --format {csv,custom,html,json,screen,txt,xml,yaml}
-f {csv,custom,html,json,sarif,screen,txt,xml,yaml}, --format {csv,custom,html,json,sarif,screen,txt,xml,yaml}
specify output format
--msg-template MSG_TEMPLATE
specify output message template (only usable with
Expand Down
Loading

0 comments on commit 22b4226

Please sign in to comment.