forked from miker123/Python-Web-Scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
getAllChrExt.py
60 lines (51 loc) · 1.5 KB
/
getAllChrExt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python
#Written By Mike R
#August 1st, 2016
#This program does the following:
#Finds all Chrome Extensions
#Writes the URLs from sitemap to a file
#Write all of the extension IDs to a separate .txt file
import urllib2, urllib
#get raw site data
global sites
sites=[]
#get refined data for sites
global extSites
extSites=[]
f=open("extensionURLS.txt", "w")
f2=open("extensionURLS.txt", "r")
f3=open("extendData.txt", "w")
def homepage():
file = urllib2.urlopen('https://chrome.google.com/webstore/sitemap?shard=0&numshards=0')
data = file.readlines()
file.close()
#read lines above, now separate what we want below.
for l in data:
if "<loc" in l:
sites.append(l)
#get the data for extensions
homepage()
for l in sites:
l=l.replace("<loc>","")
l=l.replace("</loc>","")
new_str = l.replace('amp;', '')
f.write(new_str)
extSites.append(new_str)
f.close()
for line in f2:
#print "Site is: " + line
try:
file2 = urllib2.urlopen(line)
data2 = file2.readlines()
file2.close()
for M in data2:
if "<loc" in M:
M=M.replace("<loc>","")
M=M.replace("</loc>","")
#remove URL so can just get full extension
M=M.replace("https://chrome.google.com/webstore/detail/", "")
M=M.split("/")[1]
f3.write(M)
#download all files to whatever location software is running from
except:
print ""