-
Notifications
You must be signed in to change notification settings - Fork 0
/
server.py
121 lines (97 loc) · 6.03 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
Author(s): Blake McBride ([email protected])
Created: 12/06/2023
Overview: This file defines the backend server functionality for Historian
"""
# import standard modules
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
from flask import request
from flask_cors import CORS
from flask import jsonify
from flask_caching import Cache
# import src modules
from src.webScraping.webScraper import WebScraper
from src.graphing.hierarchicalClustering import HierarchicalClustering
# initialize the Historian app
app = dash.Dash("Historian")
server = app.server
# initialize cache to save dendrogram if fetch fails
cache = Cache(app.server, config={
'CACHE_TYPE' : 'filesystem',
'CACHE_DIR' : 'cache-directory'
})
cache.init_app(server)
# configure CORS to allow requests from frontend
extension_id = "efgnmahmglilhimdpjgjedjieapjkjeh"
CORS(app.server, resources={r"/*": {"origins": f"chrome-extension://{extension_id}"}})
# setup initial app layout
app.layout = html.Div([
dcc.Graph(id='graph'),
html.Div(id="hidden-div", style={'display': 'none'})
])
# @app.callback(
# Output('graph', 'figure'),
# [Input('graph', 'clickData')]
# )
# def display_click_data(clickData):
# if clickData:
# point_url = clickData['points'][0]['customdata']
# webbrowser.open_new_tab(point_url)
# return dendro
# create server route for clients to send data to the backend
@app.server.route('/receive_data', methods=['POST'])
def receive_data():
# get the list of urls sent from the client and print them
data = request.json
urls = data.get('urls', [])
print("——————————————————————————————————————————————————————————————————————————————————————————————————————————————")
print("RECEIVED URLS FROM CLIENT")
print("——————————————————————————————————————————————————————————————————————————————————————————————————————————————")
for url in urls:
print(" ", url)
print("——————————————————————————————————————————————————————————————————————————————————————————————————————————————\n")
# scrape the text data at the webpages
print("——————————————————————————————————————————————————————————————————————————————————————————————————————————————")
print("EXTRACTING WEBPAGE TEXT")
print("——————————————————————————————————————————————————————————————————————————————————————————————————————————————")
webscraper = WebScraper()
docs = webscraper.scrapeWebpages(urls)
print("\nDONE")
print("——————————————————————————————————————————————————————————————————————————————————————————————————————————————\n")
# preprocess the webpage documents and perform agglomerative hierarchical clustering
# then visualize the results in a Dendrogram figure and display it for the user to see
print("——————————————————————————————————————————————————————————————————————————————————————————————————————————————")
print("PROCESSING WEBPAGE DATA")
print("——————————————————————————————————————————————————————————————————————————————————————————————————————————————")
hc = HierarchicalClustering()
dendro = hc.main(docs)
print("\nDONE")
print("——————————————————————————————————————————————————————————————————————————————————————————————————————————————\n")
# store the figure in cache in case fetch fails
cache.set('dendrogram', dendro)
# update the figure on the frontend
app.layout = html.Div([
dcc.Graph(
id="graph",
figure=dendro
)
])
return jsonify(message='Dendrogram generated'), 200
@app.callback(
Output('graph', 'figure'),
[Input('hidden-div', 'children')]
)
# create server route to check figure availability on the server side
@app.server.route('/check_dendrogram', methods=['GET'])
def check_dendrogram():
dendro = cache.get('dendrogram')
if dendro is not None:
return jsonify(available=True)
return jsonify(available=False)
if __name__ == '__main__':
# start the server
app.run_server(debug=False)