-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathscraper.py
60 lines (49 loc) · 1.78 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/python
# -*- coding: utf-8 -*-
####################################################################################
#
# Basicly same idea as Passcracker.py, only this generates a baseword list.
# Reads 1MB from password.txt and choose a random word to search for.
#
####################################################################################
import json
import urllib2
import re
import sys
import random
import subprocess
import unicodedata
import time
def unique(words):
array = []
[array.append(x.encode('utf-8')) for x in words if x not in array and (len(x) > 3 and len(x) < 16)]
return array
def twitter(query):
twitter = urllib2.urlopen("http://search.twitter.com/search.json?q=%s&rpp=1000"%query).read()
data = json.loads(twitter)
array = []
for tweet in data['results']:
filterurl = re.sub(r"""((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|(([^\s()<>]+|(([^\s()<>]+)))*))+(?:(([^\s()<>]+|(([^\s()<>]+)))*)|[^\s'!()[]{};:'".,<>?«»""'']))""", ' ', tweet['text'])
filterspecial = unicodedata.normalize('NFKD', re.sub(r"""[^\w]|_""", ' ',filterurl)).encode('ascii', 'ignore')
array.extend(re.split(r"""\s+""", filterspecial))
return array
def randomword():
f= open('password.txt', "r")
f.seek (0, 2)
size = f.tell()
f.seek (max (size-1024, 0), 0)
words = random.choice(filter(None, [line.strip() for line in f.readlines()]))
return words
if __name__ == "__main__":
while 1:
try:
word = randomword()
print "Searching for:",word
wordlist = unique(twitter(word))
for i in xrange(len(wordlist)):
open("password.txt", "a").write("%s\n"%wordlist[i])
time.sleep(1)
except (KeyboardInterrupt, SystemExit):
sys.exit(sys.stderr.write("Bai Bai\n"))
except:
pass