diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index c24601fef..4e962fdbd 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -354,12 +354,16 @@ def source(self, source): """ if is_event(source): self._source = source + hosts_are_same = self.host == source.host if source.scope_distance >= 0: new_scope_distance = int(source.scope_distance) # only increment the scope distance if the host changes - if self.host != source.host: + if not hosts_are_same: new_scope_distance += 1 self.scope_distance = new_scope_distance + # inherit affiliate tag + if hosts_are_same and "affiliate" in source.tags: + self.add_tag("affiliate") elif not self._dummy: log.warning(f"Tried to set invalid source on {self}: (got: {source})") diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py index 95a5a848f..04d8000fa 100644 --- a/bbot/modules/internal/excavate.py +++ b/bbot/modules/internal/excavate.py @@ -352,7 +352,8 @@ async def handle_event(self, event): scheme = self.helpers.is_uri(location, return_scheme=True) if scheme in ("http", "https"): if num_redirects <= self.max_redirects: - url_event = self.make_event(location, "URL_UNVERIFIED", event) + # tag redirects to out-of-scope hosts as affiliates + url_event = self.make_event(location, "URL_UNVERIFIED", event, tags="affiliate") if url_event is not None: # inherit web spider distance from parent (don't increment) source_web_spider_distance = getattr(event, "web_spider_distance", 0) diff --git a/bbot/test/test_step_2/module_tests/test_module_httpx.py b/bbot/test/test_step_2/module_tests/test_module_httpx.py index fcf134dd3..77f3e98b8 100644 --- a/bbot/test/test_step_2/module_tests/test_module_httpx.py +++ b/bbot/test/test_step_2/module_tests/test_module_httpx.py @@ -66,3 +66,32 @@ def check(self, module_test, events): [e for e in events if e.type == "URL" and e.data == "http://127.0.0.1:8888/" and "status-301" in e.tags] ) assert 1 == len([e for e in events if e.type == "URL" and e.data == "https://127.0.0.1:9999/"]) + + +class TestHTTPX_Redirect(ModuleTestBase): + targets = ["http://127.0.0.1:8888"] + modules_overrides = ["httpx", "speculate", "excavate"] + + async def setup_after_prep(self, module_test): + module_test.httpserver.expect_request("/").respond_with_data( + "Redirecting...", status=301, headers={"Location": "http://www.evilcorp.com"} + ) + + def check(self, module_test, events): + assert 1 == len( + [e for e in events if e.type == "URL" and e.data == "http://127.0.0.1:8888/" and "status-301" in e.tags] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "URL_UNVERIFIED" and e.data == "http://www.evilcorp.com/" and "affiliate" in e.tags + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type.startswith("DNS_NAME") and e.data == "www.evilcorp.com" and "affiliate" in e.tags + ] + )