-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinks_scraper.py
45 lines (35 loc) · 1.27 KB
/
links_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
'''
Scrapes sites from sitemap list, signals finish with email logging handler
'''
import pickle
import logging
from logging.handlers import SMTPHandler
from samssimplescraper import Scraper
from config_example import config
logger = logging.getLogger()
log_format = logging.Formatter(
fmt='%(asctime)s - %(name)s - %(levelname)s- %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
)
local_handler = logging.FileHandler(
filename='./logs/links_scraper.log',
mode='w'
)
local_handler.setFormatter(log_format)
local_handler.setLevel(logging.INFO)
logger.addHandler(local_handler)
email_handler = SMTPHandler(mailhost=config['mailhost'],
fromaddr=config['fromaddr'],
toaddrs=config['toaddrs'],
subject='Instance 1 scraper done.',
credentials=config['credentials'],
secure=())
email_handler.setFormatter(log_format)
email_handler.setLevel(logging.INFO)
logger.addHandler(email_handler)
ROOT_URL = "https://www.winemag.com/buying-guide/"
with open('./data/pickled_lists/total_links_list.pkl', 'rb') as fpick:
links = pickle.load(fpick)
scraper = Scraper(link_list=links[0:5], root_url=ROOT_URL, folders=True)
scraper.get_html()
logger.warning('Scraper on instance 1 is finished!')