Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Excavate from RAW_TEXT events #1636

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
b9c5de5
Add `raw_text` to excavate and change unstructured discovery context
domwhewell-sage Aug 6, 2024
98b0b3b
Add tests and stop newlines from printing in debug logs
domwhewell-sage Aug 6, 2024
430a631
Include test.notreal and localhost in tests
domwhewell-sage Aug 6, 2024
8b80e54
Fix unstructured
domwhewell-sage Aug 7, 2024
a40da6a
Make `RAW_TEXT` a dict event
domwhewell-sage Aug 7, 2024
0b490f6
Revert "Make `RAW_TEXT` a dict event"
domwhewell-sage Aug 7, 2024
0fe898e
Add a check to ensure event is a `dict`
domwhewell-sage Aug 7, 2024
32cdcbc
Test *almost* all Excavate rules
domwhewell-sage Aug 7, 2024
3b67328
Fix final_url not defined error
domwhewell-sage Aug 7, 2024
3e6109b
lint
domwhewell-sage Aug 7, 2024
2a1fed4
Increase scope distance
domwhewell-sage Aug 7, 2024
1198d9b
Merge branch 'dev' into excavate_raw_text
domwhewell-sage Aug 16, 2024
5c985aa
Only add url to `FINDING` event if it exists in the source_event
domwhewell-sage Aug 16, 2024
c48a5c6
Add the parents path to the `FINDING` / `VULNERABILITY`
domwhewell-sage Aug 16, 2024
4abd67c
Get the url from the event dict
domwhewell-sage Aug 16, 2024
b03df80
Check if parent data is a dictionary
domwhewell-sage Aug 16, 2024
121462c
Improved tests and fixed parent event
domwhewell-sage Aug 16, 2024
48aae2e
Made changes to test. *crosses fingers
domwhewell-sage Aug 16, 2024
6b4217b
D'oh
domwhewell-sage Aug 16, 2024
4df4d4a
Correct tests for `URL_UNVERIFIED` events
domwhewell-sage Aug 16, 2024
672c790
bro, do you even unit test?
domwhewell-sage Aug 16, 2024
e735daa
better host inheritance
invalid-email-address Aug 16, 2024
92dc6a5
Merge remote-tracking branch 'upstream/better-host-inheritance' into …
domwhewell-sage Aug 17, 2024
147579d
Remove getting parent.path in excavate as the `FINDING` does that for us
domwhewell-sage Aug 17, 2024
8048e70
if there is an event host from `HTTP_RESPONSE` then set this in the e…
domwhewell-sage Aug 17, 2024
a0a8f32
clean up
invalid-email-address Aug 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bbot/core/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ async def _infinite_retry(self, callback, *args, **kwargs):
try:
return await asyncio.wait_for(callback(*args, **kwargs), timeout=interval)
except (TimeoutError, asyncio.exceptions.TimeoutError):
self.log.debug(f"{self.name}: Timeout after {interval:,} seconds{context}, retrying...")
self.log.debug(f"{self.name}: Timeout after {interval:,} seconds {context}, retrying...")
retries += 1
if max_retries is not None and retries > max_retries:
raise TimeoutError(f"Timed out after {max_retries*interval:,} seconds {context}")
Expand Down
58 changes: 29 additions & 29 deletions bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,15 +335,6 @@ def host_original(self):
return self.host
return self._host_original

@property
def closest_host(self):
"""
Walk up the chain of parents events until we hit the first one with a host
"""
if self.host is not None or self.parent is None or self.parent is self:
return self.host
return self.parent.closest_host

@property
def port(self):
self.host
Expand Down Expand Up @@ -602,7 +593,7 @@ def get_parents(self, omit=False, include_self=False):
return parents

def _host(self):
return None
return ""

def _sanitize_data(self, data):
"""
Expand Down Expand Up @@ -899,7 +890,7 @@ def __hash__(self):

def __str__(self):
max_event_len = 80
d = str(self.data)
d = str(self.data).replace("\n", "\\n")
return f'{self.type}("{d[:max_event_len]}{("..." if len(d) > max_event_len else "")}", module={self.module}, tags={self.tags})'

def __repr__(self):
Expand Down Expand Up @@ -954,30 +945,39 @@ def _host(self):


class ClosestHostEvent(DictHostEvent):
# if a host isn't specified, this event type uses the host from the closest parent
# if a host/path/url isn't specified, this event type grabs it from the closest parent
# inherited by FINDING and VULNERABILITY
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if "host" not in self.data:
closest_host = self.closest_host
if closest_host is None:
raise ValueError("No host was found in event parents. Host must be specified!")
self.data["host"] = str(closest_host)
if not self.host:
for parent in self.get_parents(include_self=True):
# inherit closest URL
if not "url" in self.data:
parent_url = getattr(parent, "parsed_url", None)
if parent_url is not None:
self.data["url"] = parent_url.geturl()
# inherit closest path
if not "path" in self.data and isinstance(parent.data, dict):
parent_path = parent.data.get("path", None)
if parent_path is not None:
self.data["path"] = parent_path
# inherit closest host
if parent.host:
self.data["host"] = str(parent.host)
break
# die if we still haven't found a host
if not self.host:
raise ValueError("No host was found in event parents. Host must be specified!")


class DictPathEvent(DictEvent):
_path_keywords = ["path", "filename"]

def sanitize_data(self, data):
new_data = dict(data)
file_blobs = getattr(self.scan, "_file_blobs", False)
folder_blobs = getattr(self.scan, "_folder_blobs", False)
for path_keyword in self._path_keywords:
blob = None
try:
data_path = Path(data[path_keyword])
except KeyError:
continue
blob = None
try:
data_path = Path(data["path"])
if data_path.is_file():
self.add_tag("file")
if file_blobs:
Expand All @@ -987,10 +987,10 @@ def sanitize_data(self, data):
self.add_tag("folder")
if folder_blobs:
blob = self._tar_directory(data_path)
else:
continue
if blob:
new_data["blob"] = base64.b64encode(blob).decode("utf-8")
except KeyError:
pass
if blob:
new_data["blob"] = base64.b64encode(blob).decode("utf-8")

return new_data

Expand Down
2 changes: 1 addition & 1 deletion bbot/modules/gowitness.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ async def handle_batch(self, *events):
url = screenshot["url"]
final_url = screenshot["final_url"]
filename = self.screenshot_path / screenshot["filename"]
webscreenshot_data = {"filename": str(filename), "url": final_url}
webscreenshot_data = {"path": str(filename), "url": final_url}
parent_event = event_dict[url]
await self.emit_event(
webscreenshot_data,
Expand Down
Loading
Loading