Skip to content

Commit

Permalink
Merge pull request #902 from domwhewell-sage/github-improvements
Browse files Browse the repository at this point in the history
New Module "github_org" + various GitHub improvements
  • Loading branch information
TheTechromancer authored Dec 21, 2023
2 parents 2ffa75c + a358a5d commit e92c2fa
Show file tree
Hide file tree
Showing 16 changed files with 801 additions and 139 deletions.
4 changes: 3 additions & 1 deletion bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def source(self, source):
"""
if is_event(source):
self._source = source
hosts_are_same = self.host == source.host
hosts_are_same = self.host and (self.host == source.host)
if source.scope_distance >= 0:
new_scope_distance = int(source.scope_distance)
# only increment the scope distance if the host changes
Expand Down Expand Up @@ -752,6 +752,8 @@ class ASN(DictEvent):


class CODE_REPOSITORY(DictHostEvent):
_always_emit = True

class _data_validator(BaseModel):
url: str
_validate_url = field_validator("url")(validators.validate_url)
Expand Down
8 changes: 6 additions & 2 deletions bbot/core/helpers/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,12 @@ def __getattribute__(self, attr):
# then try web
return getattr(self.web, attr)
except AttributeError:
# then die
raise AttributeError(f'Helper has no attribute "{attr}"')
try:
# then try validators
return getattr(self.validators, attr)
except AttributeError:
# then die
raise AttributeError(f'Helper has no attribute "{attr}"')


class DummyModule(BaseModule):
Expand Down
10 changes: 7 additions & 3 deletions bbot/core/helpers/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,12 +504,13 @@ def is_port(p):
return p and p.isdigit() and 0 <= int(p) <= 65535


def is_dns_name(d):
def is_dns_name(d, include_local=True):
"""
Determines if the given string is a valid DNS name.
Args:
d (str): The string to be checked.
include_local (bool): Consider local hostnames to be valid (hostnames without periods)
Returns:
bool: True if the string is a valid DNS name, False otherwise.
Expand All @@ -519,14 +520,17 @@ def is_dns_name(d):
True
>>> is_dns_name('localhost')
True
>>> is_dns_name('localhost', include_local=False)
False
>>> is_dns_name('192.168.1.1')
False
"""
if is_ip(d):
return False
d = smart_decode(d)
if bbot_regexes.hostname_regex.match(d):
return True
if include_local:
if bbot_regexes.hostname_regex.match(d):
return True
if bbot_regexes.dns_name_regex.match(d):
return True
return False
Expand Down
8 changes: 8 additions & 0 deletions bbot/core/helpers/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,11 @@ def soft_validate(s, t):
return True
except ValueError:
return False


def is_email(email):
try:
validate_email(email)
return True
except ValueError:
return False
37 changes: 19 additions & 18 deletions bbot/modules/github.py → bbot/modules/github_codesearch.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
from bbot.modules.templates.subdomain_enum import subdomain_enum_apikey
from bbot.modules.templates.github import github


class github(subdomain_enum_apikey):
class github_codesearch(github):
watched_events = ["DNS_NAME"]
produced_events = ["URL_UNVERIFIED"]
produced_events = ["CODE_REPOSITORY", "URL_UNVERIFIED"]
flags = ["passive", "subdomain-enum", "safe"]
meta = {"description": "Query Github's API for related repositories", "auth_required": True}
options = {"api_key": ""}
options_desc = {"api_key": "Github token"}
meta = {"description": "Query Github's API for code containing the target domain name", "auth_required": True}
options = {"api_key": "", "limit": 100}
options_desc = {"api_key": "Github token", "limit": "Limit code search to this many results"}

base_url = "https://api.github.com"
github_raw_url = "https://raw.githubusercontent.com/"

async def setup(self):
ret = await super().setup()
self.headers = {"Authorization": f"token {self.api_key}"}
return ret

async def ping(self):
url = f"{self.base_url}/zen"
response = await self.helpers.request(url)
assert getattr(response, "status_code", 0) == 200
self.limit = self.config.get("limit", 100)
return await super().setup()

async def handle_event(self, event):
query = self.make_query(event)
Expand All @@ -39,6 +33,7 @@ async def query(self, query):
repos = {}
url = f"{self.base_url}/search/code?per_page=100&type=Code&q={self.helpers.quote(query)}&page=" + "{page}"
agen = self.helpers.api_page_iter(url, headers=self.headers, json=False)
num_results = 0
try:
async for r in agen:
if r is None:
Expand All @@ -47,6 +42,8 @@ async def query(self, query):
if status_code == 429:
"Github is rate-limiting us (HTTP status: 429)"
break
if status_code != 200:
break
try:
j = r.json()
except Exception as e:
Expand All @@ -64,10 +61,14 @@ async def query(self, query):
repos[repo_url].append(raw_url)
except KeyError:
repos[repo_url] = [raw_url]
num_results += 1
if num_results >= self.limit:
break
if num_results >= self.limit:
break
finally:
agen.aclose()
return repos

@staticmethod
def raw_url(url):
return url.replace("https://github.com/", "https://raw.githubusercontent.com/").replace("/blob/", "/")
def raw_url(self, url):
return url.replace("https://github.com/", self.github_raw_url).replace("/blob/", "/")
200 changes: 200 additions & 0 deletions bbot/modules/github_org.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
from bbot.modules.templates.github import github


class github_org(github):
watched_events = ["ORG_STUB", "SOCIAL"]
produced_events = ["CODE_REPOSITORY"]
flags = ["passive", "subdomain-enum", "safe"]
meta = {"description": "Query Github's API for organization and member repositories"}
options = {"api_key": "", "include_members": True, "include_member_repos": False}
options_desc = {
"api_key": "Github token",
"include_members": "Enumerate organization members",
"include_member_repos": "Also enumerate organization members' repositories",
}

scope_distance_modifier = 2

async def setup(self):
self.include_members = self.config.get("include_members", True)
self.include_member_repos = self.config.get("include_member_repos", False)
return await super().setup()

async def filter_event(self, event):
if event.type == "SOCIAL":
if event.data.get("platform", "") != "github":
return False, "event is not a github profile"
# reject org members if the setting isn't enabled
# this prevents gathering of org member repos
if (not self.include_member_repos) and ("github-org-member" in event.tags):
return False, "include_member_repos is False"
return True

async def handle_event(self, event):
# handle github profile
if event.type == "SOCIAL":
user = event.data.get("profile_name", "")
in_scope = False
if "github-org-member" in event.tags:
is_org = False
elif "github-org" in event.tags:
is_org = True
in_scope = True
else:
is_org, in_scope = await self.validate_org(user)

# find repos from user/org (SOCIAL --> CODE_REPOSITORY)
repos = []
if is_org:
if in_scope:
self.verbose(f"Searching for repos belonging to organization {user}")
repos = await self.query_org_repos(user)
else:
self.verbose(f"Organization {user} does not appear to be in-scope")
elif "github-org-member" in event.tags:
self.verbose(f"Searching for repos belonging to user {user}")
repos = await self.query_user_repos(user)
for repo_url in repos:
repo_event = self.make_event({"url": repo_url}, "CODE_REPOSITORY", source=event)
repo_event.scope_distance = event.scope_distance
self.emit_event(repo_event)

# find members from org (SOCIAL --> SOCIAL)
if is_org and self.include_members:
self.verbose(f"Searching for any members belonging to {user}")
org_members = await self.query_org_members(user)
for member in org_members:
event_data = {"platform": "github", "profile_name": member, "url": f"https://github.com/{member}"}
member_event = self.make_event(event_data, "SOCIAL", tags="github-org-member", source=event)
self.emit_event(member_event)

# find valid orgs from stub (ORG_STUB --> SOCIAL)
elif event.type == "ORG_STUB":
user = event.data
self.verbose(f"Validating whether the organization {user} is within our scope...")
is_org, in_scope = await self.validate_org(user)
if not is_org or not in_scope:
self.verbose(f"Unable to validate that {user} is in-scope, skipping...")
return

event_data = {"platform": "github", "profile_name": user, "url": f"https://github.com/{user}"}
github_org_event = self.make_event(event_data, "SOCIAL", tags="github-org", source=event)
github_org_event.scope_distance = event.scope_distance
self.emit_event(github_org_event)

async def query_org_repos(self, query):
repos = []
url = f"{self.base_url}/orgs/{self.helpers.quote(query)}/repos?per_page=100&page=" + "{page}"
agen = self.helpers.api_page_iter(url, headers=self.headers, json=False)
try:
async for r in agen:
if r is None:
break
status_code = getattr(r, "status_code", 0)
if status_code == 403:
self.warning("Github is rate-limiting us (HTTP status: 403)")
break
if status_code != 200:
break
try:
j = r.json()
except Exception as e:
self.warning(f"Failed to decode JSON for {r.url} (HTTP status: {status_code}): {e}")
break
if not j:
break
for item in j:
html_url = item.get("html_url", "")
repos.append(html_url)
finally:
agen.aclose()
return repos

async def query_org_members(self, query):
members = []
url = f"{self.base_url}/orgs/{self.helpers.quote(query)}/members?per_page=100&page=" + "{page}"
agen = self.helpers.api_page_iter(url, headers=self.headers, json=False)
try:
async for r in agen:
if r is None:
break
status_code = getattr(r, "status_code", 0)
if status_code == 403:
self.warning("Github is rate-limiting us (HTTP status: 403)")
break
if status_code != 200:
break
try:
j = r.json()
except Exception as e:
self.warning(f"Failed to decode JSON for {r.url} (HTTP status: {status_code}): {e}")
break
if not j:
break
for item in j:
login = item.get("login", "")
members.append(login)
finally:
agen.aclose()
return members

async def query_user_repos(self, query):
repos = []
url = f"{self.base_url}/users/{self.helpers.quote(query)}/repos?per_page=100&page=" + "{page}"
agen = self.helpers.api_page_iter(url, headers=self.headers, json=False)
try:
async for r in agen:
if r is None:
break
status_code = getattr(r, "status_code", 0)
if status_code == 403:
self.warning("Github is rate-limiting us (HTTP status: 403)")
break
if status_code != 200:
break
try:
j = r.json()
except Exception as e:
self.warning(f"Failed to decode JSON for {r.url} (HTTP status: {status_code}): {e}")
break
if not j:
break
for item in j:
html_url = item.get("html_url", "")
repos.append(html_url)
finally:
agen.aclose()
return repos

async def validate_org(self, org):
is_org = False
in_scope = False
url = f"{self.base_url}/orgs/{org}"
r = await self.helpers.request(url, headers=self.headers)
if r is None:
return is_org, in_scope
status_code = getattr(r, "status_code", 0)
if status_code == 403:
self.warning("Github is rate-limiting us (HTTP status: 403)")
return is_org, in_scope
if status_code == 200:
is_org = True
try:
json = r.json()
except Exception as e:
self.warning(f"Failed to decode JSON for {r.url} (HTTP status: {status_code}): {e}")
return is_org, in_scope
for k, v in json.items():
if (
isinstance(v, str)
and (
self.helpers.is_dns_name(v, include_local=False)
or self.helpers.is_url(v)
or self.helpers.is_email(v)
)
and self.scan.in_scope(v)
):
self.verbose(f'Found in-scope key "{k}": "{v}" for {org}, it appears to be in-scope')
in_scope = True
break
return is_org, in_scope
Loading

0 comments on commit e92c2fa

Please sign in to comment.