Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/better-host-inheritance' into …
Browse files Browse the repository at this point in the history
…excavate_raw_text
  • Loading branch information
domwhewell-sage committed Aug 17, 2024
2 parents 672c790 + e735daa commit 92dc6a5
Show file tree
Hide file tree
Showing 14 changed files with 386 additions and 220 deletions.
2 changes: 1 addition & 1 deletion bbot/core/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ async def _infinite_retry(self, callback, *args, **kwargs):
try:
return await asyncio.wait_for(callback(*args, **kwargs), timeout=interval)
except (TimeoutError, asyncio.exceptions.TimeoutError):
self.log.debug(f"{self.name}: Timeout after {interval:,} seconds{context}, retrying...")
self.log.debug(f"{self.name}: Timeout after {interval:,} seconds {context}, retrying...")
retries += 1
if max_retries is not None and retries > max_retries:
raise TimeoutError(f"Timed out after {max_retries*interval:,} seconds {context}")
Expand Down
91 changes: 56 additions & 35 deletions bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def __init__(
scan=None,
scans=None,
tags=None,
confidence=5,
confidence=100,
timestamp=None,
_dummy=False,
_internal=None,
Expand All @@ -146,7 +146,7 @@ def __init__(
scan (Scan, optional): BBOT Scan object. Required unless _dummy is True. Defaults to None.
scans (list of Scan, optional): BBOT Scan objects, used primarily when unserializing an Event from the database. Defaults to None.
tags (list of str, optional): Descriptive tags for the event. Defaults to None.
confidence (int, optional): Confidence level for the event, on a scale of 1-10. Defaults to 5.
confidence (int, optional): Confidence level for the event, on a scale of 1-100. Defaults to 100.
timestamp (datetime, optional): Time of event discovery. Defaults to current UTC time.
_dummy (bool, optional): If True, disables certain data validations. Defaults to False.
_internal (Any, optional): If specified, makes the event internal. Defaults to None.
Expand Down Expand Up @@ -237,6 +237,27 @@ def __init__(
def data(self):
return self._data

@property
def confidence(self):
return self._confidence

@confidence.setter
def confidence(self, confidence):
self._confidence = min(100, max(1, int(confidence)))

@property
def cumulative_confidence(self):
"""
Considers the confidence of parent events. This is useful for filtering out speculative/unreliable events.
E.g. an event with a confidence of 50 whose parent is also 50 would have a cumulative confidence of 25.
A confidence of 100 will reset the cumulative confidence to 100.
"""
if self._confidence == 100 or self.parent is None or self.parent is self:
return self._confidence
return int(self._confidence * self.parent.cumulative_confidence / 100)

@property
def resolved_hosts(self):
if is_ip(self.host):
Expand Down Expand Up @@ -314,15 +335,6 @@ def host_original(self):
return self.host
return self._host_original

@property
def closest_host(self):
"""
Walk up the chain of parents events until we hit the first one with a host
"""
if self.host is not None or self.parent is None or self.parent is self:
return self.host
return self.parent.closest_host

@property
def port(self):
self.host
Expand Down Expand Up @@ -368,7 +380,7 @@ def discovery_path(self):
This event's full discovery context, including those of all its parents
"""
parent_path = []
if self.parent is not None and self != self.parent:
if self.parent is not None and self.parent is not self:
parent_path = self.parent.discovery_path
return parent_path + [[self.id, self.discovery_context]]

Expand Down Expand Up @@ -472,7 +484,7 @@ def scope_distance(self, scope_distance):
self._scope_distance = new_scope_distance
# apply recursively to parent events
parent_scope_distance = getattr(self.parent, "scope_distance", None)
if parent_scope_distance is not None and self != self.parent:
if parent_scope_distance is not None and self.parent is not self:
self.parent.scope_distance = scope_distance + 1

@property
Expand Down Expand Up @@ -581,7 +593,7 @@ def get_parents(self, omit=False, include_self=False):
return parents

def _host(self):
return None
return ""

def _sanitize_data(self, data):
"""
Expand Down Expand Up @@ -933,30 +945,39 @@ def _host(self):


class ClosestHostEvent(DictHostEvent):
# if a host isn't specified, this event type uses the host from the closest parent
# if a host/path/url isn't specified, this event type grabs it from the closest parent
# inherited by FINDING and VULNERABILITY
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if "host" not in self.data:
closest_host = self.closest_host
if closest_host is None:
raise ValueError("No host was found in event parents. Host must be specified!")
self.data["host"] = str(closest_host)
if not self.host:
for parent in self.get_parents(include_self=True):
# inherit closest URL
if not "url" in self.data:
parent_url = getattr(parent, "parsed_url", None)
if parent_url is not None:
self.data["url"] = parent_url.geturl()
# inherit closest path
if not "path" in self.data and isinstance(parent.data, dict):
parent_path = parent.data.get("path", None)
if parent_path is not None:
self.data["path"] = parent_path
# inherit closest host
if parent.host:
self.data["host"] = str(parent.host)
break
# die if we still haven't found a host
if not self.host:
raise ValueError("No host was found in event parents. Host must be specified!")


class DictPathEvent(DictEvent):
_path_keywords = ["path", "filename"]

def sanitize_data(self, data):
new_data = dict(data)
file_blobs = getattr(self.scan, "_file_blobs", False)
folder_blobs = getattr(self.scan, "_folder_blobs", False)
for path_keyword in self._path_keywords:
blob = None
try:
data_path = Path(data[path_keyword])
except KeyError:
continue
blob = None
try:
data_path = Path(data["path"])
if data_path.is_file():
self.add_tag("file")
if file_blobs:
Expand All @@ -966,10 +987,10 @@ def sanitize_data(self, data):
self.add_tag("folder")
if folder_blobs:
blob = self._tar_directory(data_path)
else:
continue
if blob:
new_data["blob"] = base64.b64encode(blob).decode("utf-8")
except KeyError:
pass
if blob:
new_data["blob"] = base64.b64encode(blob).decode("utf-8")

return new_data

Expand Down Expand Up @@ -1487,7 +1508,7 @@ def make_event(
scan=None,
scans=None,
tags=None,
confidence=5,
confidence=100,
dummy=False,
internal=None,
):
Expand All @@ -1507,7 +1528,7 @@ def make_event(
scan (Scan, optional): BBOT Scan object associated with the event.
scans (List[Scan], optional): Multiple BBOT Scan objects, primarily used for unserialization.
tags (Union[str, List[str]], optional): Descriptive tags for the event, as a list or a single string.
confidence (int, optional): Confidence level for the event, on a scale of 1-10. Defaults to 5.
confidence (int, optional): Confidence level for the event, on a scale of 1-100. Defaults to 100.
dummy (bool, optional): Disables data validations if set to True. Defaults to False.
internal (Any, optional): Makes the event internal if set to True. Defaults to None.
Expand Down Expand Up @@ -1636,7 +1657,7 @@ def event_from_json(j, siem_friendly=False):
"event_type": event_type,
"scans": j.get("scans", []),
"tags": j.get("tags", []),
"confidence": j.get("confidence", 5),
"confidence": j.get("confidence", 100),
"context": j.get("discovery_context", None),
"dummy": True,
}
Expand Down
Loading

0 comments on commit 92dc6a5

Please sign in to comment.