Skip to content
This repository has been archived by the owner on Sep 2, 2019. It is now read-only.

Commit

Permalink
Merge branch 'wget_errno_13'
Browse files Browse the repository at this point in the history
  • Loading branch information
leunammejii committed Dec 17, 2018
2 parents 2397162 + 62d2f62 commit 4104c9d
Show file tree
Hide file tree
Showing 3 changed files with 294 additions and 153 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@ The following command will:
- Recursively download the site when an open directory is found hosting a file with a particular extension

Optional arguments:
- **--quiet** : Don't show wget output
- **--timeout** : Set time to wait for a connection
- **--tor** : Download files via the Tor network
- **--verbose** : Show error messages
```bash
python opendir_certstream.py [--timeout] [--tor]
python opendir_certstream.py [--quiet] [--timeout] [--tor] [--verbose]
```
**Note**: Any URLs in the queue will be lost once the program stops.

Expand All @@ -51,10 +53,12 @@ The following command will:
Optional arguments:
- **--dryrun** : Perform a test run to see what would be downloaded
- **--exclude** : A comma-separated list of domains to not download content from (ex. 'google.com,bing.com')
- **--quiet** : Don't show wget output
- **--timeout** : Set time to wait for a connection
- **--tor** : Download files via the Tor network
- **--verbose** : Show error messages
```bash
python opendir_urlscan.py <QUERY_TYPE> <DELTA> <FILE_EXTENSION> [--dry-run] [--exclude=CSV] [--timeout] [--tor]
python opendir_urlscan.py <QUERY_TYPE> <DELTA> <FILE_EXTENSION> [--dry-run] [--exclude=CSV] [--quiet] [--timeout] [--tor] [--verbose]
```
**Note**: If the path is a file, it will be downloaded regardless of whether it's an open directory.

Expand Down
183 changes: 135 additions & 48 deletions opendir_certstream.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/opt/splunk/bin/python
#!/usr/bin/python
"""
Description:
- Stream CT logs via Certstream
Expand All @@ -7,8 +7,10 @@
- Recursively download the site when an open directory is found hosting a file with a particular extension
Optional arguments:
- --quiet : Don't show wget output
- --timeout : Set time to wait for a connection
- --tor : Download files via the Tor network
- --verbose : Show error messages
Credit: https://github.com/x0rz/phishing_catcher
Expand Down Expand Up @@ -51,6 +53,11 @@

# Parse Arguments
parser = argparse.ArgumentParser(description="Attempt to detect phishing kits and open directories via Certstream.")
parser.add_argument("--quiet",
dest="quiet",
action="store_true",
required=False,
help="Don't show wget output")
parser.add_argument("--timeout",
dest="timeout",
type=int,
Expand All @@ -62,6 +69,11 @@
action="store_true",
required=False,
help="Download files over the Tor network")
parser.add_argument("--verbose",
dest="verbose",
action="store_true",
required=False,
help="Show error messages")
args = parser.parse_args()

# hxxp://sebastiandahlgren[.]se/2014/06/27/running-a-method-as-a-background-thread-in-python/
Expand Down Expand Up @@ -90,7 +102,8 @@ def run(self):
continue

while not url_queue.empty():
url = url_queue.get()
url = url_queue.get()

tqdm.tqdm.write(
"[*] Session : "
"{}".format(colored(url, "blue"))
Expand All @@ -102,6 +115,20 @@ def run(self):
timeout=timeout,
allow_redirects=True)
except Exception as err:
if args.verbose:
tqdm.tqdm.write(
"[!] Error : "
"{}".format(
colored(err, "red", attrs=["bold", "underline"])
)
)

tqdm.tqdm.write(
"[!] Failed : "
"{}".format(
colored(url, "red", attrs=["underline"])
)
)
continue

if not (resp.status_code == 200 and "Index of " in resp.content):
Expand All @@ -120,37 +147,37 @@ def run(self):
tqdm.tqdm.write(
"[*] Download : "
"{} ('Index of ' found)".format(
colored(url, "green")
colored(url, "green", attrs=["bold"])
)
)

try:
subprocess.call([
"{}".format(torsocks),
"wget",
"--quiet",
"--execute=robots=off",
"--tries=2",
"--no-clobber",
"--timeout={}".format(timeout),
"--waitretry=0",
"--directory-prefix=./{}/".format(directory),
"--content-disposition",
"--recursive",
"--level=0",
"--no-parent",
url
])
wget_command = format_wget(timeout, directory, uagent, url)

subprocess.call(wget_command)

tqdm.tqdm.write(
"[*] Complete : "
"{}".format(
colored(url, "green", attrs=["underline", "bold"]))
colored(url, "green", attrs=["bold", "underline"])
)
)
break
except Exception as err:
print("[!] Error : {}".format(
colored(err, "red", attrs=["bold"])
))
if args.verbose:
tqdm.tqdm.write(
"[!] Error : "
"{}".format(
colored(err, "red", attrs=["bold", "underline"])
)
)

tqdm.tqdm.write(
"[!] Failed : "
"{}".format(
colored(url, "red", attrs=["underline"])
)
)
continue
time.sleep(self.interval)

Expand Down Expand Up @@ -217,9 +244,16 @@ def score_domain(domain):
if res is not None:
domain = '.'.join([res.subdomain, res.domain])
except Exception as err:
print("[!] Error : {}".format(
colored(err, "red", attrs=["bold"])
))
if args.verbose:
tqdm.tqdm.write(
"[!] Error : "
"{}".format(colored(err, "red", attrs=["bold", "underline"]))
)

tqdm.tqdm.write(
"[!] Failed : "
"{}".format(colored(domain, "red", attrs=["underline"]))
)
pass

score += int(round(entropy.shannon_entropy(domain)*50))
Expand Down Expand Up @@ -252,13 +286,18 @@ def score_domain(domain):

def main():
""" """
# Set variables for arguments
global uagent
uagent = "Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko"
uagent = "Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko"
global timeout
timeout = args.timeout
timeout = args.timeout
certstream_url = "wss://certstream.calidog.io"
global url_queue
url_queue = Queue.Queue()
url_queue = Queue.Queue()

global quiet
if args.quiet:
quiet = "--quiet"

# Print start messages
show_summary()
Expand All @@ -274,24 +313,28 @@ def main():

if external["override_suspicious.yaml"] is True:
suspicious = external
else:
if external["keywords"] is not None:
suspicious["keywords"].update(external["keywords"])

if external["tlds"] is not None:
suspicious["tlds"].update(external["tlds"])
for key in external.keys():
if external[key] is None:
external_error(key, "external")

if external["archives"] is not None:
suspicious["archives"] = external["archives"]
else:
print(colored("At least one extension is required for 'archives'.", "red", attrs=["bold"]))
exit()
suspicious[key] = external[key]
else:
for key in external.keys():
if key == "override_suspicious.yaml" or key == "queries":
continue

if key == "keywords" or key == "tlds":
if external[key] is not None:
suspicious[key].update(external[key])
elif key == "archives" or key == "files":
if external[key] is not None:
suspicious[key] = external[key]
else:
external_error(key, "external")

if external["files"] is not None:
suspicious["files"] = external["files"]
else:
print(colored("At least one extension is required for 'files'.", "red", attrs=["bold"]))
exit()
if key not in suspicious.keys() or suspicious[key] is None:
external_error(key, "suspicious")

# Start queue and listen for events via Certstream
print(colored("Starting queue...\n", "yellow", attrs=["bold"]))
Expand All @@ -305,8 +348,10 @@ def show_summary():
"""Print summary of arguments selected"""

print("Summary:")
print(" quiet : {}".format(args.quiet))
print(" timeout : {}".format(args.timeout))
print(" tor : {}\n".format(args.tor))
print(" tor : {}".format(args.tor))
print(" verbose : {}\n".format(args.verbose))
return

def show_network(uagent, timeout):
Expand All @@ -323,7 +368,7 @@ def show_network(uagent, timeout):
else:
ip_type = "Original"
proxies = {}
torsocks = ""
torsocks = None

try:
global requested_ip
Expand All @@ -333,8 +378,13 @@ def show_network(uagent, timeout):
timeout=timeout,
allow_redirects=True).content
except Exception as err:
print("[!!] Error : {}".format(
colored(err, "red", attrs=["bold"])
if args.verbose:
print("[!] Error : {}".format(
colored(err, "red", attrs=["bold", "underline"])
))

print("[!] Failed : {}".format(
colored("Use --verbose to capture the error message", "red", attrs=["underline"])
))
exit()

Expand All @@ -346,5 +396,42 @@ def show_network(uagent, timeout):
print(colored("{} IP: {}\n".format(ip_type, requested_ip), "yellow", attrs=["bold"]))
return

def external_error(key, override):
""" """
print(colored(
"No {} found in {}.yaml ({}:).".format(key, override, key),
"red",
attrs=["bold"]
))
exit()

def format_wget(timeout, directory, uagent, url):
"""Return the wget command needed to download files."""

wget_command = [
"wget",
"--execute=robots=off",
"--tries=2",
"--no-clobber",
"--timeout={}".format(timeout),
"--waitretry=0",
"--directory-prefix=./{}/".format(directory),
"--header='User-Agent: {}'".format(uagent),
"--content-disposition",
"--recursive",
"--level=0",
"--no-parent"
]

if torsocks != None:
wget_command.insert(0, torsocks)

if args.quiet:
wget_command.append(quiet)

wget_command.append(url)

return wget_command

if __name__ == "__main__":
main()
Loading

0 comments on commit 4104c9d

Please sign in to comment.