Skip to content

Commit

Permalink
Merge pull request #945 from blacklanternsecurity/urlparse-fix
Browse files Browse the repository at this point in the history
Improve URL Excavation
  • Loading branch information
TheTechromancer authored Dec 28, 2023
2 parents a4c5190 + b35280f commit 8e0a682
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 3 deletions.
15 changes: 12 additions & 3 deletions bbot/modules/internal/excavate.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,10 @@ def report(self, result, name, event, **kwargs):


class URLExtractor(BaseExtractor):
url_path_regex = r"((?:\w|\d)(?:[\d\w-]+\.?)+(?::\d{1,5})?(?:/[-\w\.\(\)]+)*/?)"
regexes = {
"fullurl": r"(?i)" + r"(\w{2,15})://((?:\w|\d)(?:[\d\w-]+\.?)+(?::\d{1,5})?(?:/[-\w\.\(\)]+)*/?)",
"fulluri": r"(?i)" + r"([a-z]\w{1,15})://" + url_path_regex,
"fullurl": r"(?i)" + r"(https?)://" + url_path_regex,
"a-tag": r"<a\s+(?:[^>]*?\s+)?href=([\"'])(.*?)\1",
"script-tag": r"<script\s+(?:[^>]*?\s+)?src=([\"'])(.*?)\1",
}
Expand Down Expand Up @@ -119,7 +121,7 @@ async def _search(self, content, event, **kwargs):
# yield to event loop
await self.excavate.helpers.sleep(0)
for result in regex.findall(content):
if name == "fullurl":
if name.startswith("full"):
protocol, other = result
result = f"{protocol}://{other}"

Expand All @@ -145,7 +147,14 @@ async def _search(self, content, event, **kwargs):
yield result, name

def report(self, result, name, event, **kwargs):
parsed_uri = self.excavate.helpers.urlparse(result)
parsed_uri = None
try:
parsed_uri = self.excavate.helpers.urlparse(result)
except Exception as e:
self.excavate.debug(f"Error parsing URI {result}: {e}")
netloc = getattr(parsed_uri, "netloc", None)
if netloc is None:
return
host, port = self.excavate.helpers.split_host_port(parsed_uri.netloc)
# Handle non-HTTP URIs (ftp, s3, etc.)
if not "http" in parsed_uri.scheme.lower():
Expand Down
11 changes: 11 additions & 0 deletions bbot/test/test_step_2/module_tests/test_module_excavate.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,3 +269,14 @@ async def setup_before_prep(self, module_test):

def check(self, module_test, events):
assert any(e.data == "test.asdf.fakedomain" for e in events)


class TestExcavateURL(TestExcavate):
async def setup_before_prep(self, module_test):
module_test.httpserver.expect_request("/").respond_with_data(
"SomeSMooshedDATAhttps://asdffoo.test.notreal/some/path"
)

def check(self, module_test, events):
assert any(e.data == "asdffoo.test.notreal" for e in events)
assert any(e.data == "https://asdffoo.test.notreal/some/path" for e in events)

0 comments on commit 8e0a682

Please sign in to comment.