From 62d7a1680347d2cf0cea34537d0c19b3d7bee606 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 28 Sep 2023 15:19:00 -0400 Subject: [PATCH] fix wayback URL validation error --- bbot/core/helpers/validators.py | 5 ++++- bbot/test/test_step_1/test_helpers.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/bbot/core/helpers/validators.py b/bbot/core/helpers/validators.py index 82d7a38d4..b26543e47 100644 --- a/bbot/core/helpers/validators.py +++ b/bbot/core/helpers/validators.py @@ -143,7 +143,10 @@ def collapse_urls(urls, threshold=10): """ url_hashes = {} for url in urls: - new_url = clean_url(url) + try: + new_url = clean_url(url) + except ValueError as e: + log.verbose(f"Failed to clean url {url}: {e}") url_hash = hash_url(new_url) try: url_hashes[url_hash].add(new_url) diff --git a/bbot/test/test_step_1/test_helpers.py b/bbot/test/test_step_1/test_helpers.py index abf09cadc..b46a9ad2c 100644 --- a/bbot/test/test_step_1/test_helpers.py +++ b/bbot/test/test_step_1/test_helpers.py @@ -39,6 +39,8 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_config, bbot_https assert helpers.validators.clean_url("http://evilcorp.com/asdf?a=asdf#frag").geturl() == "http://evilcorp.com/asdf" assert helpers.validators.clean_url("http://evilcorp.com//asdf").geturl() == "http://evilcorp.com/asdf" assert helpers.validators.clean_url("http://evilcorp.com.").geturl() == "http://evilcorp.com/" + with pytest.raises(ValueError): + helpers.validators.clean_url("http://evilcorp,com") assert helpers.url_depth("http://evilcorp.com/asdf/user/") == 2 assert helpers.url_depth("http://evilcorp.com/asdf/user") == 2