-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwebchange.py
139 lines (115 loc) · 4.6 KB
/
webchange.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#
# @file <webchange.py>
#
# @author Fernando Mendiburu - <[email protected]>
#
import os
import time
import requests
import difflib
from selenium import webdriver
updateTime = 300 # 5 minutes
path = os.path.expanduser("~") + "/WebChangeFinder/"
#-------------------------------------------------------------------------------------------
#---------------------------------------Functions-------------------------------------------
#-------------------------------------------------------------------------------------------
def InitDriver():
print('Init driver, headless mode...')
chromedriver = path + 'chromedriver_linux64/chromedriver'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("window-size=1200,600")
driver = webdriver.Chrome(executable_path=chromedriver, chrome_options=options)
return driver
def FixAddr(addr):
substring = "www."
if not substring in addr:
addr = substring + addr
substring = "https://"
if not substring in addr:
addr = substring + addr
return addr
def LoadPage(driver,addr):
print('Access %s...' % addr)
driver.get(addr)
def WriteFile(name_file,ResponseContent):
file = open(name_file,'w+')
file.write(ResponseContent)
file.close()
def GetAndSaveResponsePage(file,addr):
print("Request page...")
response = requests.get(addr)
WriteFile(file,response.content)
print("Page saved!")
return response
def Prompt(question):
while "the answer is invalid":
reply = str(raw_input(question+' (y/n): ')).lower().strip()
if reply[:1] == 'y':
return True
if reply[:1] == 'n':
return False
def PrintChanges(response_outdated,response_updated):
isPrint = Prompt("Do you want to print the changes?")
if(isPrint):
diff = difflib.ndiff(response_outdated.content.splitlines(), response_updated.content.splitlines())
print("\n")
for line in diff:
if line.startswith('- ') or line.startswith('+ '):
line = line.replace('\n', '')
print(line)
print("\n")
def TakeScreenshot(driver):
isScreenshot = Prompt("Do you want to take a screenshot?")
if (isScreenshot):
driver.save_screenshot("Screenshot.png")
def isMonitoring():
isMonitor = Prompt("Do you want to continue monitoring the page?\n If (n) the program will end.")
if (isMonitor): return True
else: return False
def isValidWebpage(driver):
try:
element = driver.find_element_by_xpath("//*[@id='main-message']/h1/span")
if element.text.find('This site')>=0: return False
else: return True
except: return True
def Credits():
print('-----------------------------------------------------------')
print('webchange.py: The bot for detecting changes in webpages.')
print('Fernando Mendiburu - 2020')
print('-----------------------------------------------------------')
#-------------------------------------------------------------------------------------------
#------------------------------------------Main---------------------------------------------
#-------------------------------------------------------------------------------------------
if __name__ == '__main__':
try:
Credits()
driver = InitDriver()
CorrectAddr = False
while not CorrectAddr:
addr = raw_input("Enter the webpage you want to monitor changes!\nExample https://www.google.com: ")
addr = FixAddr(addr)
LoadPage(driver,addr)
CorrectAddr = isValidWebpage(driver)
#--- get response ---
response_outdated = GetAndSaveResponsePage("index_updated.html",addr)
isMonitor = True
while isMonitor:
print("Wait %d seconds to the updated page..." % int(updateTime))
time.sleep(updateTime)
#--- get response ---
response_updated = GetAndSaveResponsePage("index_updated.html",addr)
if response_outdated.content == response_updated.content:
print("Webpage without changes!")
else:
print("Changes in the webpage!")
PrintChanges(response_outdated,response_updated)
isMonitor = isMonitoring()
if not isMonitor: break
TakeScreenshot(driver)
response_outdated = response_updated
LoadPage(driver,addr)
except: # to handle exceptions better use: TimeoutException, NoSuchElementException, etc
print("Exception!")
print("Close Driver\nEnd program!")
driver.close()