-
Notifications
You must be signed in to change notification settings - Fork 0
/
pipline.py
46 lines (31 loc) · 1.41 KB
/
pipline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import logging
import subprocess
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
news_site_uids = ['eluniversal', 'elpais']
def main():
_extract()
_transform()
_load()
def _extract():
logger.info('Starting extract process')
for news_site_uid in news_site_uids:
subprocess.run(['python3', 'main.py', news_site_uid], cwd='./extract')
subprocess.run(['find', '.', '-name', '{}*'.format(news_site_uid),
'-exec', 'mv', '{}', '../transform/{}_.csv'.format(news_site_uid), ';'], cwd='./extract')
def _transform():
logger.info('Starting transform process')
for news_site_uid in news_site_uids:
dirty_data_filename = '{}_.csv'.format(news_site_uid)
clean_data_filename = 'clean_{}'.format(dirty_data_filename)
subprocess.run(['python3', 'main.py', dirty_data_filename], cwd='./transform')
subprocess.run(['rm', dirty_data_filename], cwd='./transform')
subprocess.run(['mv', clean_data_filename, '../load/{}.csv'.format(news_site_uid)], cwd='./transform')
def _load():
logger.info('Starting load process')
for news_site_uid in news_site_uids:
clean_data_filename = '{}.csv'.format(news_site_uid)
subprocess.run(['python3','main.py', clean_data_filename], cwd='./load')
subprocess.run(['rm', clean_data_filename], cwd='./load')
if __name__ == '__main__':
main()