Skip to content

Commit

Permalink
fix excavate bug
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions committed Aug 22, 2024
1 parent 5f0db14 commit 9eb9152
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 11 deletions.
2 changes: 1 addition & 1 deletion bbot/core/helpers/web/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def AsyncClient(self, *args, **kwargs):
try:
return self.web_clients[retries]
except KeyError:
log.critical('CREATING CLIENT')
log.critical("CREATING CLIENT")
from .client import BBOTAsyncClient

client = BBOTAsyncClient.from_config(self.config, self.target, *args, **kwargs)
Expand Down
16 changes: 13 additions & 3 deletions bbot/core/helpers/web/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def __init__(self, parent_helper):
self.web_config = self.config.get("web", {})
self.web_spider_depth = self.web_config.get("spider_depth", 1)
self.web_spider_distance = self.web_config.get("spider_distance", 0)
self.web_clients = {}
self.target = self.preset.target
self.ssl_verify = self.config.get("ssl_verify", False)
engine_debug = self.config.get("engine", {}).get("debug", False)
Expand All @@ -64,9 +65,18 @@ def __init__(self, parent_helper):
)

def AsyncClient(self, *args, **kwargs):
from .client import BBOTAsyncClient

return BBOTAsyncClient.from_config(self.config, self.target, *args, persist_cookies=False, **kwargs)
# cache by retries to prevent unwanted accumulation of clients
# (they are not garbage-collected)
retries = kwargs.get("retries", 1)
try:
return self.web_clients[retries]
except KeyError:
log.critical("CREATING CLIENT")
from .client import BBOTAsyncClient

client = BBOTAsyncClient.from_config(self.config, self.target, *args, persist_cookies=False, **kwargs)
self.web_clients[client.retries] = client
return client

async def request(self, *args, **kwargs):
"""
Expand Down
12 changes: 6 additions & 6 deletions bbot/test/test_step_2/module_tests/test_module_excavate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ async def setup_before_prep(self, module_test):
\\nhttps://www1.test.notreal
\\x3dhttps://www2.test.notreal
%0ahttps://www3.test.notreal
\\u000ahttps://www4.test.notreal
\\u000ahttps://www4.test.notreal:
\nwww5.test.notreal
\\x3dwww6.test.notreal
%0awww7.test.notreal
Expand Down Expand Up @@ -989,17 +989,17 @@ def check(self, module_test, events):
assert file.is_file(), "Destination file doesn't exist"
assert open(file).read() == self.pdf_data, f"File at {file} does not contain the correct content"
raw_text_events = [e for e in events if e.type == "RAW_TEXT"]
assert 1 == len(raw_text_events), "Failed to emmit RAW_TEXT event"
assert 1 == len(raw_text_events), "Failed to emit RAW_TEXT event"
assert (
raw_text_events[0].data == self.unstructured_response
), f"Text extracted from PDF is incorrect, got {raw_text_events[0].data}"
email_events = [e for e in events if e.type == "EMAIL_ADDRESS"]
assert 1 == len(email_events), "Failed to emmit EMAIL_ADDRESS event"
assert 1 == len(email_events), "Failed to emit EMAIL_ADDRESS event"
assert (
email_events[0].data == "[email protected]"
), f"Email extracted from unstructured text is incorrect, got {email_events[0].data}"
finding_events = [e for e in events if e.type == "FINDING"]
assert 2 == len(finding_events), "Failed to emmit FINDING events"
assert 2 == len(finding_events), "Failed to emit FINDING events"
assert any(
e.type == "FINDING"
and "JWT" in e.data["description"]
Expand All @@ -1008,7 +1008,7 @@ def check(self, module_test, events):
and e.data["path"].endswith("http-127-0-0-1-8888-test-pdf.pdf")
and str(e.host) == "127.0.0.1"
for e in finding_events
), f"Failed to emmit JWT event got {finding_events}"
), f"Failed to emit JWT event got {finding_events}"
assert any(
e.type == "FINDING"
and "DOTNET" in e.data["description"]
Expand All @@ -1017,7 +1017,7 @@ def check(self, module_test, events):
and e.data["path"].endswith("http-127-0-0-1-8888-test-pdf.pdf")
and str(e.host) == "127.0.0.1"
for e in finding_events
), f"Failed to emmit serialized event got {finding_events}"
), f"Failed to emit serialized event got {finding_events}"
assert finding_events[0].data["path"] == str(file), "File path not included in finding event"
url_events = [e.data for e in events if e.type == "URL_UNVERIFIED"]
assert (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def check(self, module_test, events):
assert file.is_file(), "Destination file doesn't exist"
assert open(file).read() == self.pdf_data, f"File at {file} does not contain the correct content"
raw_text_events = [e for e in events if e.type == "RAW_TEXT"]
assert 1 == len(raw_text_events), "Failed to emmit RAW_TEXT event"
assert 1 == len(raw_text_events), "Failed to emit RAW_TEXT event"
assert (
raw_text_events[0].data == self.unstructured_response
), f"Text extracted from PDF is incorrect, got {raw_text_events[0].data}"

0 comments on commit 9eb9152

Please sign in to comment.