-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmanage.py
118 lines (98 loc) · 4.29 KB
/
manage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import os
import requests
import json
import time
import nltk
from datetime import date, timedelta
from apscheduler.scheduler import Scheduler
from apscheduler.events import EVENT_JOB_ERROR
import pandas as pd
from flask import render_template, request, redirect
from app import create_app
from config import config, Config
from app.urls import getArticleURLS
from app.article import getMonitorArticles, getAllArticles
from app.clean_text import normalizeText
from app.summary import frequencyDistributionTextSummarizer, lsaTextSummarizer, textRankTextSummarizer
from app.cluster import classifyArticles
app = create_app(config['development'])
main_url = 'https://www.monitor.co.ug'
news_url = 'https://www.monitor.co.ug/News/688324-688324-156c2gl/index.html'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
base_url = [main_url, news_url]
nltk.download('punkt')
urls = pd.DataFrame()
data = pd.DataFrame()
sched = Scheduler(daemon=True)
sched.start()
today = date.today()
yesterday = today - timedelta(days = 1)
@app.route('/', methods=['GET'])
def loader():
"""Todoy inBrief page loader """
cron_job()
return render_template('temp.html', title="Loader")
@app.route('/index', methods=['GET'])
def index():
"""Default app home page displaying summary articles"""
# if os.path.isfile('./app/databases/posts-' + str(today) + '.csv'):
# normalized = pd.read_csv("./app/databases/posts-" + str(today) + ".csv")
# normalized = normalized.drop_duplicates('Titles', keep='first')
# elif os.path.isfile('./app/databases/posts-' + str(yesterday) + '.csv'):
# normalized = pd.read_csv("./app/databases/posts-" + str(yesterday) + ".csv")
# normalized = normalized.drop_duplicates('Titles', keep='first')
# else:
normalized = pd.read_csv("./app/databases/posts-2019-05-23.csv")
normalized = normalized.drop_duplicates('Titles', keep='first')
# return render_template('temp.html', title="Loader")
normalized = normalized.dropna()
normalized_text = normalizeText(normalized["Articles"])
summarized_text = textRankTextSummarizer(normalized_text, 3)
data['Id'] = normalized['Unnamed: 0']
data['Titles'] = normalized['Titles'].apply(lambda title: title.replace('- Daily Monitor', ' '))
data['Articles'] = normalized['Articles'].apply(lambda article: article.replace('[email protected]', ' '))
data['Summaries'] = summarized_text
data['Urls'] = normalized['Urls']
data['Images'] = normalized['Images']
cluster = classifyArticles(data['Summaries'])
data['Cluster'] = cluster
articles= data.sort_values(by=['Cluster'])
return render_template('index.html', data=articles, title="Home")
@app.route('/summarizer', methods=['POST','GET'])
def compare():
"""Default app home page displaying summary articles"""
if request.method == 'POST':
model = request.form['model']
post = request.form['post'].replace('?', ' ')
posts = normalizeText([post])
if model == 'summariseArtcle':
summarized_text = frequencyDistributionTextSummarizer(posts, 3)
elif model == 'lsaTextSummarizer':
summarized_text = lsaTextSummarizer(posts, 3)
elif model == 'textRankSummarizer':
summarized_text = textRankTextSummarizer(posts, 3)
posts = {"Original": post, "Summary": summarized_text}
return render_template('summarizer.html', posts=posts)
posts = {"Original": None, "Summary": None}
return render_template('summarizer.html', posts=posts, title="Summarizer")
@app.route('/article/<int:id>', methods=['GET'])
def get_summary(id):
"""Gets an article based on the id """
if len(data) < 1:
return redirect('/index',code=302)
article = data.loc[id]
return render_template('compare.html', data=article, title="Article")
@app.route('/about', methods=['GET'])
def about():
"""Describes Todoy inBrief """
return render_template('about.html', title="About")
@sched.interval_schedule(seconds=5)
def cron_job():
if not os.path.isfile('./app/databases/posts-' + str(today) + '.csv'):
getAllArticles(base_url,headers)
def job_listener(event):
if event.exception:
time.sleep(3)
cron_job()
if __name__ == "__main__":
app.run()