forked from IldarS2000/university_parser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtools.py
58 lines (42 loc) · 1.29 KB
/
tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import os
import random
import time
import requests
from selenium import webdriver
import constants
# from proxy import PROXY
A_BORDER_SEC = 10
B_BORDER_SEC = 100
def get_random_header():
return random.choice(constants.headers)
def get_html(url):
try:
response = requests.get(url, headers={'https': get_random_header()})
return response.text
except:
print(f'{url} did not respond...')
return None
def get_html_with_engine(url):
profile = webdriver.FirefoxProfile()
profile.set_preference('general.useragent.override', get_random_header())
driver = webdriver.Firefox(profile, executable_path=constants.gecko_path)
try:
driver.get(url)
html = driver.page_source
except:
print(f'{url} did not respond...')
return None
driver.quit()
return html
# def get_html_confidently(url):
# while True:
# try:
# response = requests.get(url, headers=get_random_header(), proxies=PROXY.get_next_proxy())
# return response.text
# except:
# print(f'{url} did not respond...')
# time.sleep(random.uniform(A_BORDER_SEC, B_BORDER_SEC))
def clear_console():
os.system('cls' if os.name == 'nt' else 'clear')
def print_progress(a, b):
print(f'{a} of {b}')