diff --git a/README.md b/README.md index 62f58542..5cd3f9d7 100755 --- a/README.md +++ b/README.md @@ -65,33 +65,29 @@ the following steps: 8. After all URLs are processed, return the most relevant page. ### Features -1. Crawls Tor links (.onion).(Partially Completed) -2. Returns Page title and address with a short description about the site.(Partially Completed) -3. Save links to database.(Not Started) -4. Get emails from site.(Completed) -5. Save crawl info to file.(Completed) -6. Crawl custom domains.(Completed) -7. Check if the link is live.(Complete) -4. Built-in Updater.(Completed) +1. Crawls Tor links (.onion) only. +2. Returns Page title and address. +3. Cache links so that there won't be duplicate links. ...(will be updated) ## Contribute Contributions to this project are always welcome. -To add a new feature fork the dev branch and give a pull request when your new feature is tested and complete. +To add a new feature fork this repository and give a pull request when your new feature is tested and complete. If its a new module, it should be put inside the modules directory and imported to the main file. The branch name should be your new feature name in the format . For example, Feature_FasterCrawl_1.0. Contributor name will be updated to the below list. :D ## Dependencies 1. Tor -2. Python 3.x (Make sure pip3 is installed) +2. Python 3.x (Make sure pip3 is there) 3. Python Stem Module 4. urllib 5. Beautiful Soup 4 6. Socket 7. Sock 8. Argparse -9. Git +9. Stem module +10. Git ## Basic setup Before you run the torBot make sure the following things are done properly: @@ -109,23 +105,17 @@ with Controller.from_port(port = 9051) as controller: controller.signal(Signal.NEWNYM)` `python3 torBot.py` -`usage: torBot.py [-h] [-v] [--update] [-q] [-u URL] [-s] [-m] [-e EXTENSION] - [-l] [-i] +`usage: torBot.py [-h] [-q] [-u URL] [-m] [-e EXTENSION] [-l] optional arguments: -h, --help show this help message and exit - -v, --version Show current version of TorBot. - --update Update TorBot to the latest stable version -q, --quiet -u URL, --url URL Specifiy a website link to crawl - -s, --save Save results in a file -m, --mail Get e-mail addresses from the crawled sites -e EXTENSION, --extension EXTENSION Specifiy additional website extensions to the list(.com or .org etc) - -l, --live Check if websites are live or not (slow) - -i, --info Info displays basic info of the scanned site (very - slow)` + -l, --live Check if websites are live or not (slow)` Read more about torrc here : [Torrc](https://github.com/DedSecInside/TorBoT/blob/master/Tor.md) diff --git a/modules/getweblinks.py b/modules/getweblinks.py index 1a2f23ad..ce65b5f4 100644 --- a/modules/getweblinks.py +++ b/modules/getweblinks.py @@ -14,13 +14,11 @@ def link_status(web,out_queue,index): link_live = False - live_links = 0 out_queue[index] = web + " is_live = False " try: urllib.request.urlopen(web) link_live = True out_queue[index] = web + " is_live = True " - live_links += 1 print(web) except urllib.error.HTTPError as e: print(Bcolors.On_Red+web+Bcolors.ENDC) @@ -76,8 +74,6 @@ def getLinks(soup,ext,live=0,save=0): print(web) if save: saveJson("Onion-Links",websites) - return websites - #print (Bcolors.OKGREEN+'Live Links:'+live_links+' Dead Links:'+(str(len(websites))-live_links)) - #print ('-------------------------------') + return websites else: raise('Method parameter is not of instance bs4.BeautifulSoup') diff --git a/modules/info.py b/modules/info.py deleted file mode 100644 index eebc371a..00000000 --- a/modules/info.py +++ /dev/null @@ -1,79 +0,0 @@ -import requests -from urllib.parse import urlsplit -from termcolor.termcolor import colored, cprint - -__all__ = ['info'] - - -class info(): - def __init__(self): - pass - - def scan(self,target): - execute_all_func(target) - - def execute_all_func(self, target): - try: - self.get_robots_txt(target) - except: - cprint("No robots.txt file Found!", "blue") - try: - self.get_dot_git(target) - except: - cprint("Error !", "red") - try: - self.get_dot_svn(target) - except: - cprint("Error", "red") - try: - self.get_dot_htaccess(target) - except: - cprint("Error", "red") - - def get_robots_txt(self, target): - cprint("[*]Checking for Robots.txt", 'yellow') - url = target - target = "{0.scheme}://{0.netloc}/".format(urlsplit(url)) - req = requests.get(target+"/robots.txt") - r = req.text - cprint(r, 'blue') - - def get_dot_git(self, target): - cprint("[*]Checking for .git folder", 'yellow') - url = target - target = "{0.scheme}://{0.netloc}/".format(urlsplit(url)) - req = requests.get(target+"/.git/") - r = req.status_code - if r == 200: - cprint("Alert!", 'red') - cprint(".git folder exposed publicly", 'red') - else: - print("NO .git folder found", 'blue') - - def get_dot_svn(self, target): - cprint("[*]Checking for .svn folder", 'yellow') - url = target - target = "{0.scheme}://{0.netloc}/".format(urlsplit(url)) - req = requests.get(target+"/.svn/entries") - r = req.status_code - if r == 200: - cprint("Alert!", 'red') - cprint(".SVN folder exposed publicly", 'red') - else: - cprint("NO .SVN folder found", 'blue') - - def get_dot_htaccess(self, target): - cprint("[*]Checking for .htaccess", 'yellow') - url = target - target = "{0.scheme}://{0.netloc}/".format(urlsplit(url)) - req = requests.get(target+"/.htaccess") - r = req.text - statcode = req.status_code - if statcode == 403: - cprint("403 Forbidden", 'blue') - elif statcode == 200: - cprint("Alert!!", 'blue') - cprint(".htaccess file found!", 'blue') - else: - cprint("Status code", 'blue') -cprint(statcode, 'blue') \ No newline at end of file diff --git a/modules/updater.py b/modules/updater.py index 2554e249..d3cdb214 100644 --- a/modules/updater.py +++ b/modules/updater.py @@ -4,14 +4,14 @@ def updateTor(): print("Checking for latest stable release") - isGit = subprocess.Popen(["git","branch"],stdout=subprocess.PIPE,stderr=subprocess.STDOUT) - output = isGit.stdout.read() + isGit = subprocess.Popen("git branch",stdout=subprocess.PIPE,shell=True) + (output,err)=isGit.communicate() branch = output[2:8].decode("utf-8") - if branch == '* master': + if branch == 'master': update = subprocess.Popen(["git","pull","origin","master"],stdout=subprocess.PIPE,stderr=subprocess.STDOUT) update_out = update.stdout.read() - if update_out[90:109].decode("utf-8") == 'Already up-to-date.' : - print("TorBot is already up-to-date.") + if update_out[85:104].decode("utf-8") == 'Already up-to-date.' : + print("TorBot Already up-to-date.") else: print("TorBot has succesfully updated to latest stable version.") else: @@ -19,8 +19,8 @@ def updateTor(): subprocess.Popen(["git","remote","add","origin","https://github.com/DedSecInside/TorBoT.git"],stdout=subprocess.PIPE,stderr=subprocess.STDOUT) update = subprocess.Popen(["git","pull","origin","master"],stdout=subprocess.PIPE,stderr=subprocess.STDOUT) update_out = update.stdout.read() - if update_out[90:109].decode("utf-8") == 'Already up-to-date.' : - print("TorBot is already up-to-date.") + if update_out[85:104].decode("utf-8") == 'Already up-to-date.' : + print("TorBot Already up-to-date.") else: print("TorBot has succesfully updated to latest stable version.") return 0 diff --git a/torBot.py b/torBot.py index 0bd6fca3..a28be0a8 100644 --- a/torBot.py +++ b/torBot.py @@ -1,3 +1,9 @@ +#from modules.getemails import getMails +#from modules.getweblinks import getLinks +#from modules.pagereader import readPage +#from modules.bcolors import bcolors +#from modules.updater import updateTor + from modules import * import socket @@ -8,30 +14,23 @@ from stem.control import Controller with Controller.from_port(port = 9051) as controller: - controller.authenticate("16:3BEA46EB6C489B90608A65120BD7CF0C7BA709513AB8ACF212B9537183") - controller.signal(Signal.NEWNYM) + controller.authenticate("16:3BEA46EB6C489B90608A65120BD7CF0C7BA709513AB8ACF212B9537183") + controller.signal(Signal.NEWNYM) #TorBot VERSION -_VERSION_ = "1.1.0_dev" +_VERSION_ = "1.0.1" #TOR SETUP GLOBAL Vars SOCKS_PORT = 9050 # TOR proxy port that is default from torrc, change to whatever torrc is configured to socks.set_default_proxy(socks.SOCKS5, "127.0.0.1",SOCKS_PORT) socket.socket = socks.socksocket - - - - # Perform DNS resolution through the socket def getaddrinfo(*args): return [(socket.AF_INET, socket.SOCK_STREAM, 6, '', (args[0], args[1]))] socket.getaddrinfo = getaddrinfo - - - def header(): """ Display the header of TorBot """ - print( "######################################################") + print("#######################################################") print( "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWMMMMMMMMMMMMM") print( "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWMMMMMMMMMMMMMM") print( "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWNXNWWWWWMMMMMMMMMM") @@ -73,57 +72,49 @@ def header(): print("#######################################################") print("# TorBot - A python Tor Crawler #") print("# GitHub : https://github.com/DedsecInside/TorBot #") - print("# Help : use -h for help text #") print("#######################################################") print(Bcolors.FAIL + "LICENSE: GNU Public License" + Bcolors.ENDC) print() - - def main(): - parser = argparse.ArgumentParser() - parser.add_argument("-v","--version",action="store_true",help="Show current version of TorBot.") - parser.add_argument("--update",action="store_true",help="Update TorBot to the latest stable version") - parser.add_argument("-q","--quiet",action="store_true") - parser.add_argument("-u","--url",help="Specifiy a website link to crawl") - parser.add_argument("-s","--save",action="store_true", help="Save results in a file") - parser.add_argument("-m","--mail",action="store_true", help="Get e-mail addresses from the crawled sites") - parser.add_argument("-e","--extension",action='append',dest='extension',default=[],help="Specifiy additional website extensions to the list(.com or .org etc)") - parser.add_argument("-l","--live",action="store_true",help="Check if websites are live or not (slow)") - parser.add_argument("-i","--info",action="store_true",help="Info displays basic info of the scanned site (very slow)") - - args = parser.parse_args() - if args.version : - print("TorBot Version:"+_VERSION_) - exit() - if args.update: - updateTor() - exit() - - if args.quiet == 0: - header() - print ("Tor Ip Address :") - link = args.url - ext = 0 - live = 0 - save=0 - live = args.live - ext = args.extension - save = args.save - a = readPage("https://check.torproject.org/",1) - if link: - b = readPage(link) - else: - b = readPage("http://torlinkbgs6aabns.onion/",0) - if args.mail: - getMails(b,save) - if args.info: - inf = info() - scan(link) - getLinks(b,ext,live,save) - print ("\n\n") - return 0 + parser = argparse.ArgumentParser() + parser.add_argument("-v","--version",action="store_true",help="Show current version of TorBot.") + parser.add_argument("--update",action="store_true",help="Update TorBot to the latest stable version") + parser.add_argument("-q","--quiet",action="store_true") + parser.add_argument("-u","--url",help="Specifiy a website link to crawl") + parser.add_argument("-s","--save",action="store_true", help="Save results in a file") + parser.add_argument("-m","--mail",action="store_true", help="Get e-mail addresses from the crawled sites") + parser.add_argument("-e","--extension",action='append',dest='extension',default=[],help="Specifiy additional website extensions to the list(.com or .org etc)") + parser.add_argument("-l","--live",action="store_true",help="Check if websites are live or not (slow)") + args = parser.parse_args() + if args.version : + print("TorBot Version:"+_VERSION_) + exit() + if args.update: + updateTor() + exit() + + if args.quiet == 0: + header() + print ("Tor Ip Address :") + link = args.url + ext = 0 + live = 0 + save=0 + live = args.live + ext = args.extension + save = args.save + a = readPage("https://check.torproject.org/",1) + if link: + b = readPage(link) + else: + b = readPage("http://torlinkbgs6aabns.onion/") + if args.mail: + getMails(b,save) + getLinks(b,ext,live,save) + print ("\n\n") + return 0 if __name__ == '__main__':