Genie tracing

Signed-off-by: Prithvi Kannan <[email protected]>
databricks · Dec 12, 2024 · cda59a7 · cda59a7
1 parent c9946ad
commit cda59a7
Show file tree

Hide file tree

Showing 4 changed files with 9 additions and 4 deletions.
diff --git a/integrations/langchain/README.md b/integrations/langchain/README.md
@@ -14,7 +14,7 @@ pip install databricks-langchain
 ### Install from source
 
 ```sh
-pip install git+ssh://[email protected]/databricks/databricks-ai-bridge.git#subdirectory=integrations/langchain
+pip install git+https://[email protected]/databricks/databricks-ai-bridge.git#subdirectory=integrations/langchain
 ```
 
 ## Get started

diff --git a/integrations/langchain/src/databricks_langchain/genie.py b/integrations/langchain/src/databricks_langchain/genie.py
@@ -1,6 +1,7 @@
 from databricks_ai_bridge.genie import Genie
+import mlflow
 
-
+@mlflow.trace()
 def _concat_messages_array(messages):
     concatenated_message = "\n".join(
         [
@@ -12,7 +13,7 @@ def _concat_messages_array(messages):
     )
     return concatenated_message
 
-
+@mlflow.trace()
 def _query_genie_as_agent(input, genie_space_id, genie_agent_name):
     from langchain_core.messages import AIMessage
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -14,6 +14,7 @@ dependencies = [
   "pandas",
   "tiktoken>=0.8.0",
   "tabulate",
+  "mlflow",
 ]
 
 [project.license]

diff --git a/src/databricks_ai_bridge/genie.py b/src/databricks_ai_bridge/genie.py
@@ -7,6 +7,8 @@
 import tiktoken
 from databricks.sdk import WorkspaceClient
 
+import mlflow
+
 MAX_TOKENS_OF_DATA = 20000  # max tokens of data in markdown format
 MAX_ITERATIONS = 50  # max times to poll the API when polling for either result or the query results, each iteration is ~1 second, so max latency == 2 * MAX_ITERATIONS
 
@@ -16,7 +18,7 @@ def _count_tokens(text):
     encoding = tiktoken.encoding_for_model("gpt-4o")
     return len(encoding.encode(text))
 
-
+@mlflow.trace()
 def _parse_query_result(resp) -> Union[str, pd.DataFrame]:
     columns = resp["manifest"]["schema"]["columns"]
     header = [str(col["name"]) for col in columns]
@@ -92,6 +94,7 @@ def create_message(self, conversation_id, content):
         return resp
 
     def poll_for_result(self, conversation_id, message_id):
+        @mlflow.trace()
         def poll_result():
             iteration_count = 0
             while iteration_count < MAX_ITERATIONS: