-
Notifications
You must be signed in to change notification settings - Fork 0
/
db_ingest.py
40 lines (33 loc) · 1.35 KB
/
db_ingest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
"""
deep_classiflie_db: db component of a system that analyzes trump lies. This component scrapes a number of
sources and compiles/refreshes the primary project db
@author: Dan Dale
"""
from typing import MutableMapping, Tuple
from tweepy import API
from mysql.connector.pooling import MySQLConnectionPool
import envconfig
from db_cnx_mgr import DbCnxMgr
from scraping_classes.dcbot_tweet_scraper import DCBotTweetScraper
from scraping_classes.factbase_scraper import FactbaseScraper
from scraping_classes.wapo_scraper import WapoScraper
def get_cnxp_handle() -> MySQLConnectionPool:
# setup DB config connection
db_cnxp = DbCnxMgr()
cnxp = db_cnxp.cnxp
return cnxp
def refresh_db(conf_file: MutableMapping = None, cnxp: MySQLConnectionPool = None, svc_conf: Tuple = None,
api_handle: API = None, batch_infsvc: bool = False, nontwtr_update: bool = False) -> None:
config = envconfig.EnvConfig(conf_file, svc_conf).config
cnxp = cnxp or get_cnxp_handle()
if batch_infsvc:
FactbaseScraper(config, cnxp)
WapoScraper(config, cnxp)
DCBotTweetScraper(config, cnxp, api_handle)
elif nontwtr_update or not config.db.tweetbot.enabled:
FactbaseScraper(config, cnxp)
WapoScraper(config, cnxp)
else:
DCBotTweetScraper(config, cnxp, api_handle)
if __name__ == '__main__':
refresh_db()