-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpcrawler_service.py
118 lines (85 loc) · 3.39 KB
/
pcrawler_service.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from flask import Flask, jsonify, make_response, request, current_app
import json
from datetime import timedelta, datetime
from functools import update_wrapper
import subprocess
import os
# http://flask.pocoo.org/snippets/56/
def crossdomain(origin=None, methods=None, headers=None,
max_age=21600, attach_to_all=True,
automatic_options=True):
if methods is not None:
methods = ', '.join(sorted(x.upper() for x in methods))
if headers is not None and not isinstance(headers, basestring):
headers = ', '.join(x.upper() for x in headers)
if not isinstance(origin, basestring):
origin = ', '.join(origin)
if isinstance(max_age, timedelta):
max_age = max_age.total_seconds()
def get_methods():
if methods is not None:
return methods
options_resp = current_app.make_default_options_response()
return options_resp.headers['allow']
def decorator(f):
def wrapped_function(*args, **kwargs):
if automatic_options and request.method == 'OPTIONS':
resp = current_app.make_default_options_response()
else:
resp = make_response(f(*args, **kwargs))
if not attach_to_all and request.method != 'OPTIONS':
return resp
h = resp.headers
h['Access-Control-Allow-Origin'] = origin
h['Access-Control-Allow-Methods'] = get_methods()
h['Access-Control-Max-Age'] = str(max_age)
if headers is not None:
h['Access-Control-Allow-Headers'] = headers
return resp
f.provide_automatic_options = False
return update_wrapper(wrapped_function, f)
return decorator
PDATA_FOLDER = 'pdata/'
PDATA_LASTUPDATE = 'last_update_date.json'
PDATA_COMPLETE = 'complete.json'
def run_all_spiders():
"""Runs a bash script which runs all the spiders and creates output files.
"""
cmd = ['bash', './run_all_spiders.sh']
p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE)
out, err = p.communicate()
print out
print err
def last_update_was_today():
last_update_file_path = PDATA_FOLDER + PDATA_LASTUPDATE
if os.path.isfile(last_update_file_path):
with open(last_update_file_path) as last_update_file:
last_update_dict = json.load(last_update_file)
last_update_date = datetime.strptime(last_update_dict['date'],
'%d/%m/%Y').date()
today_date = datetime.today().date()
if today_date == last_update_date:
return True
return False
HOST = 'localhost'
PORT = 5001
DEBUG = False
app = Flask(__name__)
@app.route('/passatempos', methods=['GET'])
@crossdomain(origin='*')
def passatempos():
#if not last_update_was_today():
run_all_spiders()
with open(PDATA_FOLDER + PDATA_COMPLETE) as complete_pdata_file:
complete_pdata_dict = json.load(complete_pdata_file)
# If JSON dict is empty
if not complete_pdata_dict:
os.remove(PDATA_FOLDER + PDATA_LASTUPDATE)
raise ValueError('JSON is empty!')
return jsonify(**complete_pdata_dict)
if __name__ == '__main__':
import logging
logging.basicConfig(filename='log/service.log', level=logging.DEBUG)
app.run(host=HOST, port=PORT, debug=DEBUG)