-
Notifications
You must be signed in to change notification settings - Fork 563
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
don't punycode-encode non-host segments
- Loading branch information
1 parent
5d4d434
commit 7c34499
Showing
9 changed files
with
176 additions
and
70 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
import re | ||
import sys | ||
import copy | ||
import idna | ||
import json | ||
import atexit | ||
import codecs | ||
|
@@ -34,7 +35,6 @@ | |
|
||
from .url import * # noqa F401 | ||
from .. import errors | ||
from .punycode import * # noqa F401 | ||
from .logger import log_to_stderr | ||
from . import regexes as bbot_regexes | ||
from .names_generator import random_name, names, adjectives # noqa F401 | ||
|
@@ -898,10 +898,88 @@ def clean_old(d, keep=10, filter=lambda x: True, key=latest_mtime, reverse=True, | |
|
||
|
||
def extract_emails(s): | ||
""" | ||
Extract email addresses from a body of text | ||
""" | ||
for email in bbot_regexes.email_regex.findall(smart_decode(s)): | ||
yield email.lower() | ||
|
||
|
||
def extract_host(s): | ||
""" | ||
Attempts to find and extract the host portion of a string. | ||
Args: | ||
s (str): The string from which to extract the host. | ||
Returns: | ||
tuple: A tuple containing three strings: | ||
(hostname (None if not found), string_before_hostname, string_after_hostname). | ||
Examples: | ||
>>> extract_host("evilcorp.com:80") | ||
("evilcorp.com", "", ":80") | ||
>>> extract_host("http://evilcorp.com:80/asdf.php?a=b") | ||
("evilcorp.com", "http://", ":80/asdf.php?a=b") | ||
>>> extract_host("[email protected]") | ||
("evilcorp.com", "bob@", "") | ||
>>> extract_host("[dead::beef]:22") | ||
("dead::beef", "[", "]:22") | ||
""" | ||
match = bbot_regexes.extract_host_regex.search(s) | ||
|
||
if match: | ||
hostname = match.group(1) | ||
before = s[: match.start(1)] | ||
after = s[match.end(1) :] | ||
host, port = split_host_port(hostname) | ||
if host is not None: | ||
hostname = str(host) | ||
if port is not None: | ||
after = f":{port}{after}" | ||
if is_ip(hostname, version=6): | ||
before = f"{before}[" | ||
after = f"]{after}" | ||
return (hostname, before, after) | ||
|
||
return (None, s, "") | ||
|
||
|
||
def smart_encode_punycode(text: str) -> str: | ||
""" | ||
ドメイン.テスト --> xn--eckwd4c7c.xn--zckzah | ||
""" | ||
host, before, after = extract_host(text) | ||
if host is None: | ||
return text | ||
|
||
try: | ||
host = idna.encode(host).decode(errors="ignore") | ||
except UnicodeError: | ||
pass # If encoding fails, leave the host as it is | ||
|
||
return f"{before}{host}{after}" | ||
|
||
|
||
def smart_decode_punycode(text: str) -> str: | ||
""" | ||
xn--eckwd4c7c.xn--zckzah --> ドメイン.テスト | ||
""" | ||
host, before, after = extract_host(text) | ||
if host is None: | ||
return text | ||
|
||
try: | ||
host = idna.decode(host) | ||
except UnicodeError: | ||
pass # If decoding fails, leave the host as it is | ||
|
||
return f"{before}{host}{after}" | ||
|
||
|
||
def can_sudo_without_password(): | ||
""" | ||
Return True if the current user can sudo without a password | ||
|
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -250,38 +250,52 @@ async def test_events(events, scan, helpers, bbot_config): | |
# japanese | ||
assert scan.make_event("ドメイン.テスト", dummy=True).type == "DNS_NAME" | ||
assert scan.make_event("bob@ドメイン.テスト", dummy=True).type == "EMAIL_ADDRESS" | ||
assert scan.make_event("テスト@ドメイン.テスト", dummy=True).type == "EMAIL_ADDRESS" | ||
assert scan.make_event("ドメイン.テスト:80", dummy=True).type == "OPEN_TCP_PORT" | ||
assert scan.make_event("http://ドメイン.テスト:80", dummy=True).type == "URL_UNVERIFIED" | ||
assert scan.make_event("http://ドメイン.テスト:80/テスト", dummy=True).type == "URL_UNVERIFIED" | ||
|
||
assert scan.make_event("xn--eckwd4c7c.xn--zckzah", dummy=True).type == "DNS_NAME" | ||
assert scan.make_event("[email protected]", dummy=True).type == "EMAIL_ADDRESS" | ||
assert scan.make_event("テスト@xn--eckwd4c7c.xn--zckzah", dummy=True).type == "EMAIL_ADDRESS" | ||
assert scan.make_event("xn--eckwd4c7c.xn--zckzah:80", dummy=True).type == "OPEN_TCP_PORT" | ||
assert scan.make_event("http://xn--eckwd4c7c.xn--zckzah:80", dummy=True).type == "URL_UNVERIFIED" | ||
assert scan.make_event("http://xn--eckwd4c7c.xn--zckzah:80/テスト", dummy=True).type == "URL_UNVERIFIED" | ||
|
||
# thai | ||
assert scan.make_event("เราเที่ยวด้วยกัน.com", dummy=True).type == "DNS_NAME" | ||
assert scan.make_event("bob@เราเที่ยวด้วยกัน.com", dummy=True).type == "EMAIL_ADDRESS" | ||
assert scan.make_event("ทดสอบ@เราเที่ยวด้วยกัน.com", dummy=True).type == "EMAIL_ADDRESS" | ||
assert scan.make_event("เราเที่ยวด้วยกัน.com:80", dummy=True).type == "OPEN_TCP_PORT" | ||
assert scan.make_event("http://เราเที่ยวด้วยกัน.com:80", dummy=True).type == "URL_UNVERIFIED" | ||
assert scan.make_event("http://เราเที่ยวด้วยกัน.com:80/ทดสอบ", dummy=True).type == "URL_UNVERIFIED" | ||
|
||
assert scan.make_event("xn--12c1bik6bbd8ab6hd1b5jc6jta.com", dummy=True).type == "DNS_NAME" | ||
assert scan.make_event("[email protected]", dummy=True).type == "EMAIL_ADDRESS" | ||
assert scan.make_event("ทดสอบ@xn--12c1bik6bbd8ab6hd1b5jc6jta.com", dummy=True).type == "EMAIL_ADDRESS" | ||
assert scan.make_event("xn--12c1bik6bbd8ab6hd1b5jc6jta.com:80", dummy=True).type == "OPEN_TCP_PORT" | ||
assert scan.make_event("http://xn--12c1bik6bbd8ab6hd1b5jc6jta.com:80", dummy=True).type == "URL_UNVERIFIED" | ||
assert scan.make_event("http://xn--12c1bik6bbd8ab6hd1b5jc6jta.com:80/ทดสอบ", dummy=True).type == "URL_UNVERIFIED" | ||
|
||
# punycode - encoding / decoding tests | ||
|
||
# japanese | ||
assert scan.make_event("xn--eckwd4c7c.xn--zckzah", dummy=True).data == "xn--eckwd4c7c.xn--zckzah" | ||
assert scan.make_event("[email protected]", dummy=True).data == "[email protected]" | ||
assert scan.make_event("テスト@xn--eckwd4c7c.xn--zckzah", dummy=True).data == "テスト@xn--eckwd4c7c.xn--zckzah" | ||
assert scan.make_event("xn--eckwd4c7c.xn--zckzah:80", dummy=True).data == "xn--eckwd4c7c.xn--zckzah:80" | ||
assert scan.make_event("http://xn--eckwd4c7c.xn--zckzah:80", dummy=True).data == "http://xn--eckwd4c7c.xn--zckzah/" | ||
assert ( | ||
scan.make_event("http://xn--eckwd4c7c.xn--zckzah:80/テスト", dummy=True).data | ||
== "http://xn--eckwd4c7c.xn--zckzah/テスト" | ||
) | ||
|
||
assert scan.make_event("ドメイン.テスト", dummy=True).data == "xn--eckwd4c7c.xn--zckzah" | ||
assert scan.make_event("bob@ドメイン.テスト", dummy=True).data == "[email protected]" | ||
assert scan.make_event("テスト@ドメイン.テスト", dummy=True).data == "テスト@xn--eckwd4c7c.xn--zckzah" | ||
assert scan.make_event("ドメイン.テスト:80", dummy=True).data == "xn--eckwd4c7c.xn--zckzah:80" | ||
assert scan.make_event("http://ドメイン.テスト:80", dummy=True).data == "http://xn--eckwd4c7c.xn--zckzah/" | ||
|
||
assert scan.make_event("http://ドメイン.テスト:80/テスト", dummy=True).data == "http://xn--eckwd4c7c.xn--zckzah/テスト" | ||
# thai | ||
assert ( | ||
scan.make_event("xn--12c1bik6bbd8ab6hd1b5jc6jta.com", dummy=True).data == "xn--12c1bik6bbd8ab6hd1b5jc6jta.com" | ||
|
@@ -290,6 +304,10 @@ async def test_events(events, scan, helpers, bbot_config): | |
scan.make_event("[email protected]", dummy=True).data | ||
== "[email protected]" | ||
) | ||
assert ( | ||
scan.make_event("ทดสอบ@xn--12c1bik6bbd8ab6hd1b5jc6jta.com", dummy=True).data | ||
== "ทดสอบ@xn--12c1bik6bbd8ab6hd1b5jc6jta.com" | ||
) | ||
assert ( | ||
scan.make_event("xn--12c1bik6bbd8ab6hd1b5jc6jta.com:80", dummy=True).data | ||
== "xn--12c1bik6bbd8ab6hd1b5jc6jta.com:80" | ||
|
@@ -298,14 +316,23 @@ async def test_events(events, scan, helpers, bbot_config): | |
scan.make_event("http://xn--12c1bik6bbd8ab6hd1b5jc6jta.com:80", dummy=True).data | ||
== "http://xn--12c1bik6bbd8ab6hd1b5jc6jta.com/" | ||
) | ||
assert ( | ||
scan.make_event("http://xn--12c1bik6bbd8ab6hd1b5jc6jta.com:80/ทดสอบ", dummy=True).data | ||
== "http://xn--12c1bik6bbd8ab6hd1b5jc6jta.com/ทดสอบ" | ||
) | ||
|
||
assert scan.make_event("เราเที่ยวด้วยกัน.com", dummy=True).data == "xn--12c1bik6bbd8ab6hd1b5jc6jta.com" | ||
assert scan.make_event("bob@เราเที่ยวด้วยกัน.com", dummy=True).data == "[email protected]" | ||
assert scan.make_event("ทดสอบ@เราเที่ยวด้วยกัน.com", dummy=True).data == "ทดสอบ@xn--12c1bik6bbd8ab6hd1b5jc6jta.com" | ||
assert scan.make_event("เราเที่ยวด้วยกัน.com:80", dummy=True).data == "xn--12c1bik6bbd8ab6hd1b5jc6jta.com:80" | ||
assert ( | ||
scan.make_event("http://เราเที่ยวด้วยกัน.com:80", dummy=True).data | ||
== "http://xn--12c1bik6bbd8ab6hd1b5jc6jta.com/" | ||
) | ||
assert ( | ||
scan.make_event("http://เราเที่ยวด้วยกัน.com:80/ทดสอบ", dummy=True).data | ||
== "http://xn--12c1bik6bbd8ab6hd1b5jc6jta.com/ทดสอบ" | ||
) | ||
|
||
# test event serialization | ||
from bbot.core.event import event_from_json | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -108,6 +108,54 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_config, bbot_https | |
"[email protected]", | ||
) | ||
|
||
assert helpers.extract_host("evilcorp.com:80") == ("evilcorp.com", "", ":80") | ||
assert helpers.extract_host("http://evilcorp.com:80/asdf.php?a=b") == ( | ||
"evilcorp.com", | ||
"http://", | ||
":80/asdf.php?a=b", | ||
) | ||
assert helpers.extract_host("http://evilcorp.com:80/[email protected]") == ( | ||
"evilcorp.com", | ||
"http://", | ||
":80/[email protected]", | ||
) | ||
assert helpers.extract_host("[email protected]") == ("evilcorp.com", "bob@", "") | ||
assert helpers.extract_host("[dead::beef]:22") == ("dead::beef", "[", "]:22") | ||
assert helpers.extract_host("scp://[dead::beef]:22") == ("dead::beef", "scp://[", "]:22") | ||
assert helpers.extract_host("https://[dead::beef]:22?a=b") == ("dead::beef", "https://[", "]:22?a=b") | ||
assert helpers.extract_host("https://[dead::beef]/?a=b") == ("dead::beef", "https://[", "]/?a=b") | ||
assert helpers.extract_host("https://[dead::beef]?a=b") == ("dead::beef", "https://[", "]?a=b") | ||
assert helpers.extract_host("ftp://username:[email protected]/my-file.csv") == ( | ||
"my-ftp.com", | ||
"ftp://username:password@", | ||
"/my-file.csv", | ||
) | ||
assert helpers.extract_host("ftp://username:p@[email protected]/my-file.csv") == ( | ||
"my-ftp.com", | ||
"ftp://username:p@ssword@", | ||
"/my-file.csv", | ||
) | ||
assert helpers.extract_host("ftp://username:password:/@my-ftp.com/my-file.csv") == ( | ||
"my-ftp.com", | ||
"ftp://username:password:/@", | ||
"/my-file.csv", | ||
) | ||
assert helpers.extract_host("ftp://username:password:/@dead::beef/my-file.csv") == ( | ||
"my-ftp.com", | ||
"ftp://username:password:/@", | ||
"/my-file.csv", | ||
) | ||
assert helpers.extract_host("ftp://username:password:/@[dead::beef]/my-file.csv") == ( | ||
"dead::beef", | ||
"ftp://username:password:/@[", | ||
"]/my-file.csv", | ||
) | ||
assert helpers.extract_host("ftp://username:password:/@[dead::beef]:22/my-file.csv") == ( | ||
"dead::beef", | ||
"ftp://username:password:/@[", | ||
"]:22/my-file.csv", | ||
) | ||
|
||
assert helpers.split_domain("www.evilcorp.co.uk") == ("www", "evilcorp.co.uk") | ||
assert helpers.split_domain("asdf.www.test.notreal") == ("asdf.www", "test.notreal") | ||
assert helpers.split_domain("www.test.notreal") == ("www", "test.notreal") | ||
|
@@ -120,6 +168,8 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_config, bbot_https | |
assert helpers.split_host_port("evilcorp.co.uk") == ("evilcorp.co.uk", None) | ||
assert helpers.split_host_port("d://wat:wat") == ("wat", None) | ||
assert helpers.split_host_port("https://[dead::beef]:8338") == (ipaddress.ip_address("dead::beef"), 8338) | ||
assert helpers.split_host_port("[dead::beef]") == (ipaddress.ip_address("dead::beef"), None) | ||
assert helpers.split_host_port("dead::beef") == (ipaddress.ip_address("dead::beef"), None) | ||
extracted_words = helpers.extract_words("blacklanternsecurity") | ||
assert "black" in extracted_words | ||
# assert "blacklantern" in extracted_words | ||
|
Oops, something went wrong.