Skip to content

Commit

Permalink
tag out-of-scope redirects as affiliates
Browse files Browse the repository at this point in the history
  • Loading branch information
TheTechromancer committed Nov 8, 2023
1 parent bcca96b commit 47089b2
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 2 deletions.
6 changes: 5 additions & 1 deletion bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,12 +354,16 @@ def source(self, source):
"""
if is_event(source):
self._source = source
hosts_are_same = self.host == source.host
if source.scope_distance >= 0:
new_scope_distance = int(source.scope_distance)
# only increment the scope distance if the host changes
if self.host != source.host:
if not hosts_are_same:
new_scope_distance += 1
self.scope_distance = new_scope_distance
# inherit affiliate tag
if hosts_are_same and "affiliate" in source.tags:
self.add_tag("affiliate")
elif not self._dummy:
log.warning(f"Tried to set invalid source on {self}: (got: {source})")

Expand Down
3 changes: 2 additions & 1 deletion bbot/modules/internal/excavate.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,8 @@ async def handle_event(self, event):
scheme = self.helpers.is_uri(location, return_scheme=True)
if scheme in ("http", "https"):
if num_redirects <= self.max_redirects:
url_event = self.make_event(location, "URL_UNVERIFIED", event)
# tag redirects to out-of-scope hosts as affiliates
url_event = self.make_event(location, "URL_UNVERIFIED", event, tags="affiliate")
if url_event is not None:
# inherit web spider distance from parent (don't increment)
source_web_spider_distance = getattr(event, "web_spider_distance", 0)
Expand Down
29 changes: 29 additions & 0 deletions bbot/test/test_step_2/module_tests/test_module_httpx.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,32 @@ def check(self, module_test, events):
[e for e in events if e.type == "URL" and e.data == "http://127.0.0.1:8888/" and "status-301" in e.tags]
)
assert 1 == len([e for e in events if e.type == "URL" and e.data == "https://127.0.0.1:9999/"])


class TestHTTPX_Redirect(ModuleTestBase):
targets = ["http://127.0.0.1:8888"]
modules_overrides = ["httpx", "speculate", "excavate"]

async def setup_after_prep(self, module_test):
module_test.httpserver.expect_request("/").respond_with_data(
"Redirecting...", status=301, headers={"Location": "http://www.evilcorp.com"}
)

def check(self, module_test, events):
assert 1 == len(
[e for e in events if e.type == "URL" and e.data == "http://127.0.0.1:8888/" and "status-301" in e.tags]
)
assert 1 == len(
[
e
for e in events
if e.type == "URL_UNVERIFIED" and e.data == "http://www.evilcorp.com/" and "affiliate" in e.tags
]
)
assert 1 == len(
[
e
for e in events
if e.type.startswith("DNS_NAME") and e.data == "www.evilcorp.com" and "affiliate" in e.tags
]
)

0 comments on commit 47089b2

Please sign in to comment.