Skip to content

Commit

Permalink
Merge pull request #1308 from blacklanternsecurity/url-excavation-bug…
Browse files Browse the repository at this point in the history
…fixes

URL excavation bugfixes
  • Loading branch information
TheTechromancer authored Apr 25, 2024
2 parents eba16ae + 74358f6 commit 7965129
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 1 deletion.
1 change: 1 addition & 0 deletions bbot/defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ url_extension_blacklist:
- woff
- woff2
- ttf
- eot
- sass
- scss
# audio
Expand Down
2 changes: 1 addition & 1 deletion bbot/modules/internal/excavate.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ async def report(self, result, name, event, **kwargs):


class URLExtractor(BaseExtractor):
url_path_regex = r"((?:\w|\d)(?:[\d\w-]+\.?)+(?::\d{1,5})?(?:/[-\w\.\(\)]+)*/?)"
url_path_regex = r"((?:\w|\d)(?:[\d\w-]+\.?)+(?::\d{1,5})?(?:/[-\w\.\(\)]*[-\w\.]+)*/?)"
regexes = {
"fulluri": r"(?i)" + r"([a-z]\w{1,15})://" + url_path_regex,
"fullurl": r"(?i)" + r"(https?)://" + url_path_regex,
Expand Down
26 changes: 26 additions & 0 deletions bbot/test/test_step_2/module_tests/test_module_httpx.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,29 @@ def check(self, module_test, events):
if e.type.startswith("DNS_NAME") and e.data == "www.evilcorp.com" and "affiliate" in e.tags
]
)


class TestHTTPX_URLBlacklist(ModuleTestBase):
targets = ["http://127.0.0.1:8888"]
modules_overrides = ["httpx", "speculate", "excavate"]
config_overrides = {"web_spider_distance": 10, "web_spider_depth": 10}

async def setup_after_prep(self, module_test):
module_test.httpserver.expect_request("/").respond_with_data(
"""
<a href="/test.aspx"/>
<a href="/test.svg"/>
<a href="/test.woff2"/>
<a href="/test.txt"/>
"""
)

def check(self, module_test, events):
assert 4 == len([e for e in events if e.type == "URL_UNVERIFIED"])
assert 3 == len([e for e in events if e.type == "HTTP_RESPONSE"])
assert 3 == len([e for e in events if e.type == "URL"])
assert 1 == len([e for e in events if e.type == "URL" and e.data == "http://127.0.0.1:8888/"])
assert 1 == len([e for e in events if e.type == "URL" and e.data == "http://127.0.0.1:8888/test.aspx"])
assert 1 == len([e for e in events if e.type == "URL" and e.data == "http://127.0.0.1:8888/test.txt"])
assert not any([e for e in events if "URL" in e.type and ".svg" in e.data])
assert not any([e for e in events if "URL" in e.type and ".woff" in e.data])

0 comments on commit 7965129

Please sign in to comment.