-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcoursera_courses.py
29 lines (24 loc) · 1.03 KB
/
coursera_courses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from bs4 import BeautifulSoup
from urllib.request import urlopen
website = 'https://www.coursera.org/browse/'
links = ['arts-and-humanities', 'business', 'computer-science', \
'data-science', 'information-technology', 'health', \
'math-and-logic', 'personal-development', 'physical-science-and-engineering', \
'social-sciences', 'language-learning']
for x in links:
f = open(x+'.txt', 'w+')
link = website+x
raw = urlopen(link)
data = raw.read()
raw.close()
soup = BeautifulSoup(data)
soup = BeautifulSoup(data, 'html.parser')
for linker in soup.find_all('a'):
name = linker.get('aria-label')
if name!=None and name!='Coursera' and name!='Browse' and name!='Search' and name!='For Enterprise. See information about Coursera for Business':
try:
f.write(name+',.'+'https://www.coursera.org'+linker.get('href')+'\n')
except:
print('[!] Check for '+x)
f.close()
print(x+'\n')