Skip to content

Commit e5c70c9

Browse files
committed
add custom parsing
1 parent 6c440cb commit e5c70c9

File tree

4 files changed

+40
-50
lines changed

4 files changed

+40
-50
lines changed

server/nlp/embeddings.py

+13-12
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from server import redis_client
2020
from server.config import VECTOR_DIMENSION, RedisDocument
21+
from server.utils import custom_log
2122

2223
assert redis_client is not None
2324

@@ -35,7 +36,7 @@ def load_corpus(corpus: list[RedisDocument]):
3536
Raises:
3637
exception: if failed to load corpus into redis
3738
"""
38-
print("loading corpus...")
39+
custom_log("loading corpus...")
3940

4041
pipeline = redis_client.pipeline()
4142
for i, doc in enumerate(corpus, start=1):
@@ -45,7 +46,7 @@ def load_corpus(corpus: list[RedisDocument]):
4546

4647
if not all(res):
4748
raise Exception("failed to load some documents")
48-
print("successfully loaded all documents")
49+
custom_log("successfully loaded all documents")
4950

5051

5152
def compute_openai_embeddings(texts):
@@ -69,7 +70,7 @@ def compute_openai_embeddings(texts):
6970

7071
def compute_embeddings():
7172
"""Compute embeddings from redis documents."""
72-
print("computing embeddings...")
73+
custom_log("computing embeddings...")
7374

7475
# get keys, questions, content
7576
keys = sorted(redis_client.keys("documents:*")) # type: ignore
@@ -84,7 +85,7 @@ def compute_embeddings():
8485

8586
embeddings = compute_openai_embeddings(question_and_content)
8687

87-
print("successfully computed embeddings")
88+
custom_log("successfully computed embeddings")
8889
return embeddings
8990

9091

@@ -98,7 +99,7 @@ def load_embeddings(embeddings: list[list[float]]):
9899
Raises:
99100
exception: if failed to load embeddings into redis
100101
"""
101-
print("loading embeddings into redis...")
102+
custom_log("loading embeddings into redis...")
102103

103104
# load embeddings into redis
104105
pipeline = redis_client.pipeline()
@@ -110,7 +111,7 @@ def load_embeddings(embeddings: list[list[float]]):
110111
if not all(res):
111112
raise Exception("failed to load embeddings")
112113

113-
print("successfully loaded all embeddings")
114+
custom_log("successfully loaded all embeddings")
114115

115116

116117
def create_index(corpus_len: int):
@@ -125,7 +126,7 @@ def create_index(corpus_len: int):
125126
Raises:
126127
exception: if failed to create index
127128
"""
128-
print("creating index...")
129+
custom_log("creating index...")
129130

130131
schema = (
131132
TextField("$.source", no_stem=True, as_name="source"),
@@ -157,7 +158,7 @@ def create_index(corpus_len: int):
157158
info = redis_client.ft("idx:documents_vss").info()
158159
num_docs = info["num_docs"]
159160
indexing_failures = info["hash_indexing_failures"]
160-
print("num_docs", num_docs, "indexing_failures", indexing_failures)
161+
custom_log("num_docs", num_docs, "indexing_failures", indexing_failures)
161162
return
162163
if time.time() - start >= 60:
163164
raise Exception("time out")
@@ -191,7 +192,7 @@ def queries(query, queries: list[str]) -> list[dict]:
191192
Returns:
192193
list of dictionaries containing query and result
193194
"""
194-
print("running queries...")
195+
custom_log("running queries...")
195196

196197
# encode queries
197198
encoded_queries = compute_openai_embeddings(queries)
@@ -221,7 +222,7 @@ def queries(query, queries: list[str]) -> list[dict]:
221222
)
222223
results_list.append({"query": queries[i], "result": query_result})
223224

224-
print("done running query")
225+
custom_log("done running query")
225226
return results_list
226227

227228

@@ -249,9 +250,9 @@ def embed_corpus(corpus: list[RedisDocument]):
249250
exception: if failed to load corpus
250251
"""
251252
# flush database
252-
print("cleaning database...")
253+
custom_log("cleaning database...")
253254
redis_client.flushdb()
254-
print("done cleaning database")
255+
custom_log("done cleaning database")
255256

256257
# embed corpus
257258
if not corpus:

server/nlp/responses.py

+9-38
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from server.config import OPENAI_API_KEY, OpenAIMessage, RedisDocument
1414
from server.nlp.embeddings import query_all
15+
from server.utils import custom_log
1516

1617
openai.api_key = OPENAI_API_KEY
1718

@@ -94,7 +95,13 @@ def openai_parse(email: str) -> list[str]:
9495
assert len(questions) > 0
9596
return questions
9697
except Exception as e:
97-
print(e)
98+
custom_log(
99+
"open ai parsed email as '",
100+
response.choices[0].message.content,
101+
"', resulting in error '",
102+
e,
103+
"'. returning entire email as a single question instead.",
104+
)
98105
return [email]
99106

100107

@@ -107,7 +114,7 @@ def confidence_metric(confidences: list[float]) -> float:
107114
Returns:
108115
confidence metric
109116
"""
110-
print("confidences", confidences)
117+
custom_log("confidences", confidences)
111118
return np.min(np.array(confidences))
112119

113120

@@ -171,39 +178,3 @@ def generate_response(
171178
thread.append({"role": "user", "content": email})
172179
thread += contexts
173180
return openai_response(thread, sender), docs, confidence
174-
175-
176-
# def test():
177-
# thread = []
178-
# new_email = "Where is the hackathon held? When is the application deadline? \
179-
# When is HackMIT happening?"
180-
# response, docs, confidence = generate_response(new_email)
181-
182-
# for question in docs.keys():
183-
# print("question", question)
184-
# for doc in docs[question]:
185-
# print("confidence:", doc["score"])
186-
# print(f"Q: {doc['question']}")
187-
# print(f"A: {doc['content']}")
188-
# print()
189-
# print(response)
190-
# print("confidence:", confidence)
191-
192-
# thread.append({"role": "user", "content": new_email})
193-
# thread.append({"role": "assistant", "content": response})
194-
195-
# new_email = "Thank you for your response! Is there anything else I should know \
196-
# before heading to the event? Thanks!"
197-
# response, docs, confidence = generate_response(new_email, thread)
198-
199-
# print("thread", thread)
200-
201-
# for question in docs.keys():
202-
# print("question", question)
203-
# for doc in docs[question]:
204-
# print("confidence:", doc["score"])
205-
# print(f"Q: {doc['question']}")
206-
# print(f"A: {doc['content']}")
207-
# print()
208-
# print(response)
209-
# print("confidence:", confidence)

server/utils.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
"""Utils for server functions."""
2+
3+
import inspect
4+
5+
BLUE = "\033[34m"
6+
RESET = "\033[0m"
7+
8+
9+
def custom_log(*args):
10+
"""Prints some calling information along with logging info."""
11+
frame = inspect.currentframe()
12+
caller_frame = inspect.getouterframes(frame)[1]
13+
file_name = caller_frame.filename
14+
function_name = caller_frame.function
15+
print(f"{BLUE}{file_name}/{function_name}{RESET}", *args)

server_tests/utils.py

+3
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,7 @@ def seed_database(db: ProperlyTypedSQLAlchemy):
4848
)
4949
emails.append(test_email)
5050
db.session.add(test_email)
51+
52+
db.session.commit()
53+
thread.last_email = emails[-1].id
5154
db.session.commit()

0 commit comments

Comments
 (0)