Skip to content

Commit

Permalink
fix wayback URL validation error
Browse files Browse the repository at this point in the history
  • Loading branch information
TheTechromancer committed Sep 28, 2023
1 parent aaa3aba commit 62d7a16
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
5 changes: 4 additions & 1 deletion bbot/core/helpers/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,10 @@ def collapse_urls(urls, threshold=10):
"""
url_hashes = {}
for url in urls:
new_url = clean_url(url)
try:
new_url = clean_url(url)
except ValueError as e:
log.verbose(f"Failed to clean url {url}: {e}")
url_hash = hash_url(new_url)
try:
url_hashes[url_hash].add(new_url)
Expand Down
2 changes: 2 additions & 0 deletions bbot/test/test_step_1/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_config, bbot_https
assert helpers.validators.clean_url("http://evilcorp.com/asdf?a=asdf#frag").geturl() == "http://evilcorp.com/asdf"
assert helpers.validators.clean_url("http://evilcorp.com//asdf").geturl() == "http://evilcorp.com/asdf"
assert helpers.validators.clean_url("http://evilcorp.com.").geturl() == "http://evilcorp.com/"
with pytest.raises(ValueError):
helpers.validators.clean_url("http://evilcorp,com")

assert helpers.url_depth("http://evilcorp.com/asdf/user/") == 2
assert helpers.url_depth("http://evilcorp.com/asdf/user") == 2
Expand Down

0 comments on commit 62d7a16

Please sign in to comment.