-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
63 lines (49 loc) · 1.62 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import mechanicalsoup
url = "https://cit.ac.in/"
browser = mechanicalsoup.StatefulBrowser(
soup_config={'features': 'lxml'}, # Use the lxml HTML parser
raise_on_404=True,
user_agent='MyBot/0.1: mysite.example.com/bot_info',
)
browser.open(url)
# Store 5 recent notice urls to list
links = browser.get_current_page().find("div",class_="noticeContent")
link_list = []
for index, link in enumerate(links.find_all('a',href=True)):
link_list.append((link['href']))
if index==4:
break
# List of dates of publishing of notice
publish_dates=[]
dates = (browser.get_current_page().find_all("span", class_="block text-xs text-grey-dark"))
for index,d in enumerate(dates):
publish_dates.append(d.text)
if (index == len(link_list) - 1):
break
# Storing date, title, notice_type
for index,link in enumerate(link_list) :
print()
#date
print(publish_dates[index])
browser.open(link)
# title
title = (browser.get_current_page().find("span",class_="block text-grey-darkest text-lg")).text
print(title)
#notice_type
notice_type=(browser.get_current_page().find("h1"))
if (notice_type is None):
notice_type = "NOTICE"
print(notice_type)
else:
print(notice_type.text)
#brief
brief_block = browser.get_current_page().find(id="notices")
a = brief_block.find_all("span")
if (len(a) > 1):
brief = title
print("Brief: ",brief)
else:
for b in brief_block:
brief = (b.find(lambda tag: tag.name == "span" and tag['class'] == ['block'])).text
print("Brief: ",brief)
browser.close()