add custom parsing

azliu0 · azliu0 · commit e5c70c92dd04 · 2024-04-16T17:16:10.000Z
diff --git a/server/nlp/embeddings.py b/server/nlp/embeddings.py
@@ -18,6 +18,7 @@
 
 from server import redis_client
 from server.config import VECTOR_DIMENSION, RedisDocument
+from server.utils import custom_log
 
 assert redis_client is not None
 
@@ -35,7 +36,7 @@ def load_corpus(corpus: list[RedisDocument]):
     Raises:
         exception: if failed to load corpus into redis
     """
-    print("loading corpus...")
+    custom_log("loading corpus...")
 
     pipeline = redis_client.pipeline()
     for i, doc in enumerate(corpus, start=1):
@@ -45,7 +46,7 @@ def load_corpus(corpus: list[RedisDocument]):
 
     if not all(res):
         raise Exception("failed to load some documents")
-    print("successfully loaded all documents")
+    custom_log("successfully loaded all documents")
 
 
 def compute_openai_embeddings(texts):
@@ -69,7 +70,7 @@ def compute_openai_embeddings(texts):
 
 def compute_embeddings():
     """Compute embeddings from redis documents."""
-    print("computing embeddings...")
+    custom_log("computing embeddings...")
 
     # get keys, questions, content
     keys = sorted(redis_client.keys("documents:*"))  # type: ignore
@@ -84,7 +85,7 @@ def compute_embeddings():
 
     embeddings = compute_openai_embeddings(question_and_content)
 
-    print("successfully computed embeddings")
+    custom_log("successfully computed embeddings")
     return embeddings
 
 
@@ -98,7 +99,7 @@ def load_embeddings(embeddings: list[list[float]]):
     Raises:
         exception: if failed to load embeddings into redis
     """
-    print("loading embeddings into redis...")
+    custom_log("loading embeddings into redis...")
 
     # load embeddings into redis
     pipeline = redis_client.pipeline()
@@ -110,7 +111,7 @@ def load_embeddings(embeddings: list[list[float]]):
     if not all(res):
         raise Exception("failed to load embeddings")
 
-    print("successfully loaded all embeddings")
+    custom_log("successfully loaded all embeddings")
 
 
 def create_index(corpus_len: int):
@@ -125,7 +126,7 @@ def create_index(corpus_len: int):
     Raises:
         exception: if failed to create index
     """
-    print("creating index...")
+    custom_log("creating index...")
 
     schema = (
         TextField("$.source", no_stem=True, as_name="source"),
@@ -157,7 +158,7 @@ def create_index(corpus_len: int):
                 info = redis_client.ft("idx:documents_vss").info()
                 num_docs = info["num_docs"]
                 indexing_failures = info["hash_indexing_failures"]
-                print("num_docs", num_docs, "indexing_failures", indexing_failures)
+                custom_log("num_docs", num_docs, "indexing_failures", indexing_failures)
                 return
             if time.time() - start >= 60:
                 raise Exception("time out")
@@ -191,7 +192,7 @@ def queries(query, queries: list[str]) -> list[dict]:
     Returns:
         list of dictionaries containing query and result
     """
-    print("running queries...")
+    custom_log("running queries...")
 
     # encode queries
     encoded_queries = compute_openai_embeddings(queries)
@@ -221,7 +222,7 @@ def queries(query, queries: list[str]) -> list[dict]:
             )
         results_list.append({"query": queries[i], "result": query_result})
 
-    print("done running query")
+    custom_log("done running query")
     return results_list
 
 
@@ -249,9 +250,9 @@ def embed_corpus(corpus: list[RedisDocument]):
         exception: if failed to load corpus
     """
     # flush database
-    print("cleaning database...")
+    custom_log("cleaning database...")
     redis_client.flushdb()
-    print("done cleaning database")
+    custom_log("done cleaning database")
 
     # embed corpus
     if not corpus:
diff --git a/server/nlp/responses.py b/server/nlp/responses.py
@@ -12,6 +12,7 @@
 
 from server.config import OPENAI_API_KEY, OpenAIMessage, RedisDocument
 from server.nlp.embeddings import query_all
+from server.utils import custom_log
 
 openai.api_key = OPENAI_API_KEY
 
@@ -94,7 +95,13 @@ def openai_parse(email: str) -> list[str]:
         assert len(questions) > 0
         return questions
     except Exception as e:
-        print(e)
+        custom_log(
+            "open ai parsed email as '",
+            response.choices[0].message.content,
+            "', resulting in error '",
+            e,
+            "'. returning entire email as a single question instead.",
+        )
         return [email]
 
 
@@ -107,7 +114,7 @@ def confidence_metric(confidences: list[float]) -> float:
     Returns:
         confidence metric
     """
-    print("confidences", confidences)
+    custom_log("confidences", confidences)
     return np.min(np.array(confidences))
 
 
@@ -171,39 +178,3 @@ def generate_response(
     thread.append({"role": "user", "content": email})
     thread += contexts
     return openai_response(thread, sender), docs, confidence
-
-
-# def test():
-#     thread = []
-#     new_email = "Where is the hackathon held? When is the application deadline? \
-#                 When is HackMIT happening?"
-#     response, docs, confidence = generate_response(new_email)
-
-#     for question in docs.keys():
-#         print("question", question)
-#         for doc in docs[question]:
-#             print("confidence:", doc["score"])
-#             print(f"Q: {doc['question']}")
-#             print(f"A: {doc['content']}")
-#         print()
-#     print(response)
-#     print("confidence:", confidence)
-
-#     thread.append({"role": "user", "content": new_email})
-#     thread.append({"role": "assistant", "content": response})
-
-#     new_email = "Thank you for your response! Is there anything else I should know \
-#                 before heading to the event? Thanks!"
-#     response, docs, confidence = generate_response(new_email, thread)
-
-#     print("thread", thread)
-
-#     for question in docs.keys():
-#         print("question", question)
-#         for doc in docs[question]:
-#             print("confidence:", doc["score"])
-#             print(f"Q: {doc['question']}")
-#             print(f"A: {doc['content']}")
-#         print()
-#     print(response)
-#     print("confidence:", confidence)
diff --git a/server/utils.py b/server/utils.py
@@ -0,0 +1,15 @@
+"""Utils for server functions."""
+
+import inspect
+
+BLUE = "\033[34m"
+RESET = "\033[0m"
+
+
+def custom_log(*args):
+    """Prints some calling information along with logging info."""
+    frame = inspect.currentframe()
+    caller_frame = inspect.getouterframes(frame)[1]
+    file_name = caller_frame.filename
+    function_name = caller_frame.function
+    print(f"{BLUE}{file_name}/{function_name}{RESET}", *args)
diff --git a/server_tests/utils.py b/server_tests/utils.py
@@ -48,4 +48,7 @@ def seed_database(db: ProperlyTypedSQLAlchemy):
         )
         emails.append(test_email)
         db.session.add(test_email)
+
+    db.session.commit()
+    thread.last_email = emails[-1].id
     db.session.commit()

Original file line number	Diff line number	Diff line change
`@@ -48,4 +48,7 @@ def seed_database(db: ProperlyTypedSQLAlchemy):`
`48`	`48`	`)`
`49`	`49`	`emails.append(test_email)`
`50`	`50`	`db.session.add(test_email)`
	`51`	`+`
	`52`	`+ db.session.commit()`
	`53`	`+ thread.last_email = emails[-1].id`
`51`	`54`	`db.session.commit()`