Skip to content

Commit

Permalink
better social deduplication
Browse files Browse the repository at this point in the history
  • Loading branch information
TheTechromancer committed Apr 25, 2024
1 parent 64a0c18 commit 1e603fd
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 25 deletions.
38 changes: 21 additions & 17 deletions bbot/modules/social.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,35 +8,39 @@ class social(BaseModule):
meta = {"description": "Look for social media links in webpages"}
flags = ["passive", "safe", "social-enum"]

social_media_regex = {
"linkedin": r"(?:https?://)?(?:www.)?linkedin.com/(?:in|company)/([a-zA-Z0-9-]+)/?",
"facebook": r"(?:https?://)?(?:www.)?facebook.com/([a-zA-Z0-9.]+)/?",
"twitter": r"(?:https?://)?(?:www.)?twitter.com/([a-zA-Z0-9_]{1,15})/?",
"github": r"(?:https?://)?(?:www.)?github.com/([a-zA-Z0-9_-]+)/?",
"instagram": r"(?:https?://)?(?:www.)?instagram.com/([a-zA-Z0-9_.]+)/?",
"youtube": r"(?:https?://)?(?:www.)?youtube.com/@([a-zA-Z0-9_]+)/?",
"bitbucket": r"(?:https?://)?(?:www.)?bitbucket.org/([a-zA-Z0-9_-]+)/?",
"gitlab": r"(?:https?://)?(?:www.)?gitlab.(?:com|org)/([a-zA-Z0-9_-]+)",
"discord": r"(?:https?://)?(?:www.)?discord.gg/([a-zA-Z0-9_-]+)",
"docker": r"(?:https?://)?hub.docker.com/[ru]/([a-zA-Z0-9_-]+)",
"huggingface": r"(?:https?://)?huggingface.co/([a-zA-Z0-9_-]+)",
# platform name : (regex, case_sensitive)
social_media_platforms = {
"linkedin": (r"linkedin.com/(?:in|company)/([a-zA-Z0-9-]+)", False),
"facebook": (r"facebook.com/([a-zA-Z0-9.]+)", False),
"twitter": (r"twitter.com/([a-zA-Z0-9_]{1,15})", False),
"github": (r"github.com/([a-zA-Z0-9_-]+)", False),
"instagram": (r"instagram.com/([a-zA-Z0-9_.]+)", False),
"youtube": (r"youtube.com/@([a-zA-Z0-9_]+)", False),
"bitbucket": (r"bitbucket.org/([a-zA-Z0-9_-]+)", False),
"gitlab": (r"gitlab.(?:com|org)/([a-zA-Z0-9_-]+)", False),
"discord": (r"discord.gg/([a-zA-Z0-9_-]+)", True),
"docker": (r"hub.docker.com/[ru]/([a-zA-Z0-9_-]+)", False),
"huggingface": (r"huggingface.co/([a-zA-Z0-9_-]+)", False),
}

scope_distance_modifier = 1

async def setup(self):
self.compiled_regexes = {k: re.compile(v) for k, v in self.social_media_regex.items()}
self.compiled_regexes = {k: (re.compile(v), c) for k, (v, c) in self.social_media_platforms.items()}
return True

async def handle_event(self, event):
for platform, regex in self.compiled_regexes.items():
for platform, (regex, case_sensitive) in self.compiled_regexes.items():
for match in regex.finditer(event.data):
url = match.group()
if not url.startswith("http"):
url = f"https://{url}"
profile_name = match.groups()[0]
if not case_sensitive:
url = url.lower()
profile_name = profile_name.lower()
social_event = self.make_event(
{"platform": platform, "url": url, "profile_name": profile_name}, "SOCIAL", source=event
{"platform": platform, "url": f"https://{url}", "profile_name": profile_name},
"SOCIAL",
source=event,
)
social_event.scope_distance = event.scope_distance
await self.emit_event(social_event)
33 changes: 25 additions & 8 deletions bbot/test/test_step_2/module_tests/test_module_social.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,38 @@ async def setup_after_prep(self, module_test):
"response_data": """
<html>
<a href="https://discord.gg/asdf"/><a href="https://github.com/blacklanternsecurity/bbot"/>
<a href="https://hub.docker.com/r/blacklanternsecurity"/>
<a href="https://hub.docker.com/r/blacklanternsecurity/bbot"/>
<a href="https://hub.docker.com/r/blacklanternSECURITY/bbot"/>
</html>
"""
}
module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args)

def check(self, module_test, events):
assert any(
e.type == "SOCIAL" and e.data["platform"] == "discord" and e.data["profile_name"] == "asdf" for e in events
assert 3 == len([e for e in events if e.type == "SOCIAL"])
assert 1 == len(
[
e
for e in events
if e.type == "SOCIAL" and e.data["platform"] == "discord" and e.data["profile_name"] == "asdf"
]
)
assert any(
e.type == "SOCIAL" and e.data["platform"] == "docker" and e.data["profile_name"] == "blacklanternsecurity"
for e in events
assert 1 == len(
[
e
for e in events
if e.type == "SOCIAL"
and e.data["platform"] == "docker"
and e.data["profile_name"] == "blacklanternsecurity"
]
)
assert any(
e.type == "SOCIAL" and e.data["platform"] == "github" and e.data["profile_name"] == "blacklanternsecurity"
for e in events
assert 1 == len(
[
e
for e in events
if e.type == "SOCIAL"
and e.data["platform"] == "github"
and e.data["profile_name"] == "blacklanternsecurity"
]
)

0 comments on commit 1e603fd

Please sign in to comment.