Skip to content

Commit

Permalink
for FINDING, allow other URIs besides http and https
Browse files Browse the repository at this point in the history
  • Loading branch information
TheTechromancer committed Nov 29, 2023
1 parent c7510d5 commit 967bdb5
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 15 deletions.
8 changes: 1 addition & 7 deletions bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,12 +722,6 @@ def sanitize_data(self, data):


class DictEvent(BaseEvent):
def sanitize_data(self, data):
url = data.get("url", "")
if url:
self.parsed = validators.validate_url_parsed(url)
return data

def _data_human(self):
return json.dumps(self.data, sort_keys=True)

Expand Down Expand Up @@ -1018,7 +1012,7 @@ class _data_validator(BaseModel):
host: str
description: str
url: Optional[str] = None
_validate_url = field_validator("url")(validators.validate_url)
_validate_url = field_validator("url")(validators.validate_uri)
_validate_host = field_validator("host")(validators.validate_host)

def _pretty_string(self):
Expand Down
7 changes: 7 additions & 0 deletions bbot/core/helpers/regexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@
)
url_regexes = list(re.compile(r, re.I) for r in _url_regexes)

_uri_regexes = (
r"[a-z0-9]+://" + _dns_name_regex + r"(?::[0-9]{1,5})?(?:(?:/|\?).*)?",
r"[a-z0-9]+://" + _hostname_regex + r"(?::[0-9]{1,5})?(?:(?:/|\?).*)?",
r"[a-z0-9]+://\[" + _ipv6_regex + r"\](?::[0-9]{1,5})?(?:(?:/|\?).*)?",
)
uri_regexes = list(re.compile(r, re.I) for r in _uri_regexes)

_double_slash_regex = r"/{2,}"
double_slash_regex = re.compile(_double_slash_regex)

Expand Down
31 changes: 23 additions & 8 deletions bbot/core/helpers/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,19 @@ def validate_url_parsed(url: str):
return clean_url(url)


@validator
def validate_uri(uri: str):
return validate_uri_parsed(uri).geturl()


@validator
def validate_uri_parsed(uri: str):
uri = str(uri).strip()
if not any(r.match(uri) for r in regexes.uri_regexes):
raise ValidationError(f'Invalid URI: "{uri}"')
return clean_url(uri, uri=True)


@validator
def validate_severity(severity: str):
severity = str(severity).strip().upper()
Expand All @@ -158,7 +171,7 @@ def validate_email(email: str):
raise ValidationError(f'Invalid email: "{email}"')


def clean_url(url: str):
def clean_url(url: str, uri=False):
"""
Cleans and normalizes a URL. This function removes the query string and fragment,
lowercases the netloc, and removes redundant port numbers.
Expand All @@ -181,18 +194,20 @@ def clean_url(url: str):
"""
parsed = parse_url(url)
parsed = parsed._replace(netloc=str(parsed.netloc).lower(), fragment="", query="")
hostname = validate_host(parsed.hostname)
try:
scheme = parsed.scheme
except ValueError:
scheme = "https"
port = None
with suppress(Exception):
port = parsed.port
if port is None:
port = 80 if scheme == "http" else 443
hostname = validate_host(parsed.hostname)
# remove ports if they're redundant
if (scheme == "http" and port == 80) or (scheme == "https" and port == 443):
port = None
if not uri:
if port is None:
port = 80 if scheme == "http" else 443
# remove ports if they're redundant
if (scheme == "http" and port == 80) or (scheme == "https" and port == 443):
port = None
# special case for IPv6 URLs
netloc = make_netloc(hostname, port)
# urlparse is special - it needs square brackets even if there's no port
Expand All @@ -202,7 +217,7 @@ def clean_url(url: str):
# normalize double slashes
parsed = parsed._replace(path=regexes.double_slash_regex.sub("/", parsed.path))
# append / if path is empty
if parsed.path == "":
if parsed.path == "" and not uri:
parsed = parsed._replace(path="/")
return parsed

Expand Down
12 changes: 12 additions & 0 deletions bbot/test/test_step_1/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,18 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_config, bbot_https
assert helpers.validators.soft_validate("!@#$", "url") == False
with pytest.raises(ValueError):
helpers.validators.validate_url("!@#$")
assert helpers.validators.soft_validate("http://evilcorp.com", "url") == True
assert helpers.validators.soft_validate("ftp://evilcorp.com", "url") == False
# uris
assert helpers.validators.soft_validate("http://evilcorp.com", "uri") == True
assert helpers.validators.soft_validate("ftp://evilcorp.com", "uri") == True
assert helpers.validators.validate_uri("FTP://evilcorp.com") == "ftp://evilcorp.com"
assert helpers.validators.validate_uri("FTP://evilcorp.com:2121") == "ftp://evilcorp.com:2121"
uri_finding = scan.make_event(
{"host": "evilcorp.com", "url": "ftp://evilcorp.com", "description": "asdf"}, "FINDING", source=scan.root_event
)
assert uri_finding is not None
assert uri_finding.data["url"] == "ftp://evilcorp.com"
# severities
assert helpers.validators.validate_severity(" iNfo") == "INFO"
assert helpers.validators.soft_validate(" iNfo", "severity") == True
Expand Down

0 comments on commit 967bdb5

Please sign in to comment.