-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
199 lines (153 loc) · 5.87 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
from apscheduler.schedulers.background import BackgroundScheduler
import atexit
import os
import time
import pickle
import pandas as pd
import sqlite3
from dotenv import load_dotenv
from flask import Flask, jsonify, request
from requests import Response
from datasource.datasource import get_data_source
from nlp.nlp import NaturalLanguageProcessor
from src.SO.answers import get_answers
from src.config_parameters.technologies import get_all_technologies
from src.details.aggregator import DetailsAggregator
from src.details.details import Details
from src.details.similarity_score_strategy import SimilarityScoreStrategy
from src.SO.update_dump import updateDump
from src.config_parameters.technologies import find_parameter
from train_model import train_model
from urllib import parse
# Path to the pre-trained model
MODEL_PATH = "./BD/model.pickle"
CASSANDRA_PARAMETER_FILE = "./src/config_parameters/cassandra/cassandra_parameters.txt"
load_dotenv()
app = Flask(__name__)
class Dopamine:
def __init__(self):
self.load_model()
def load_model(self):
print("Loading model...")
with open(MODEL_PATH, 'rb') as file:
self.processor: NaturalLanguageProcessor = pickle.load(file)
print("Model loaded")
def scheduledUpdate(self):
input_path = "BD/QueryResults.csv"
param_file_path = "src/config_parameters/cassandra/cassandra_parameters.txt"
con = sqlite3.connect("BD/DOPAMine.db")
cur = con.cursor()
# get timestamp of last update
res = cur.execute("SELECT * FROM UpdateStamp ORDER BY UpdateTime DESC LIMIT 1")
last_update = res.fetchone()[0]
df_csv = pd.read_csv(input_path)
current_time = int(time.time())
updated = updateDump(last_update, current_time, df_csv, input_path, param_file_path)
# insert new updated timestamp
exec_str = "INSERT INTO UpdateStamp VALUES(" + str(current_time) + ");"
res = cur.execute(exec_str)
con.commit()
con.close()
if updated:
print("Scheduled model update in progress...")
QUERY_RESULTS_PATH = "BD/QueryResults.csv"
MODEL_PATH = "BD/model.pickle"
train_model(
csv_path=QUERY_RESULTS_PATH,
output_path=MODEL_PATH
)
# Model is loaded into NLP object
self.load_model()
print("Model updated")
dopamine = Dopamine();
def scheduledUpdate():
dopamine.scheduledUpdate()
scheduler = BackgroundScheduler()
scheduler.add_job(func=scheduledUpdate, trigger="interval",
seconds=int(os.environ['MODEL_UPDATE_INTERVAL_SECONDS']))
scheduler.start()
# Shut down the scheduler when exiting the app
atexit.register(lambda: scheduler.shutdown())
@app.route("/")
def home():
"""Basic home route
Returns:
str: "Hello, Flask!"
"""
return "Hello, Flask!"
@app.route("/answers/<question_id>", methods=['GET'])
def answers(question_id: int) -> Response:
"""Fetches answers from question ids.
Args:
question_id (int): Id of the question from which answers are fetched.
Returns:
Response: Fetched answers.
"""
print(f"GET /answers/{question_id}")
answers = get_answers(question_id)
return jsonify(answers)
@app.route("/technologies", methods=['GET'])
def technologies() -> Response:
"""Fetches all available technologies to search from.
Returns:
Response: List of technologies.
"""
print(f"GET /technologies")
technologies = get_all_technologies()
return jsonify(technologies)
@app.route("/search", methods=['GET'])
def search():
"""Searches for configuration parameters based on user query.
Returns:
Response: (TODO) Configuration parameters.
"""
query = request.args.get("q", default="", type=str)
technology = request.args.get("t", default="", type=str)
print(f"GET /search?q={query}&t={technology}")
query = parse.unquote(query)
technology = parse.unquote(technology)
# Model is used to determine questions sorted by highest similarity to query and similarity scores
cosine_similarities, related_indexes = dopamine.processor.search(query, os.environ['SCORE_THRESHOLD'])
normalized_scores = dopamine.processor.normalize_scores(cosine_similarities, 0, 0.8, 0, 0.9)
questions = []
for i in related_indexes:
index = int(i)
similarity_score = normalized_scores[index]
question = dopamine.processor.data_dict[index]
new_question = {
"answer_id": question["answer_id"],
"link": question["link"],
"parameters": question["parameters"],
"question_body": question["question_body"],
"question_id": question["question_id"],
"question_title": question["question_title"],
"response_body": question["response_body"],
"similarity_score": similarity_score,
"source_name": get_data_source(question["link"]),
"tags": question["tags"],
}
questions.append(new_question)
aggregator = DetailsAggregator(questions, "parameters")
aggregated_data = aggregator.aggregate()
details_list = []
for parameter in aggregated_data:
details = Details(
aggregated_data.get(parameter),
parameter,
SimilarityScoreStrategy.HIGHEST,
[
'answer_id', 'link', 'question_body', 'question_id',
'question_title', 'response_body', 'similarity_score', 'source_name', 'tags'
]
)
details_json = details.to_json()
details_list.append(details_json)
# Answers are sent as a response
response = {
"answers": details_list,
"query": query,
"technology": technology
}
return jsonify(response)
if __name__ == "__main__":
app.run(host="0.0.0.0", debug=True)