Merge pull request #34 from monocle2ai/kshitiz/update_context_key

Update key for session context field in attributes
monocle2ai · Aug 30, 2024 · 3bacec1 · 3bacec1
2 parents 905833a + 6033fe0
commit 3bacec1
Show file tree

Hide file tree

Showing 8 changed files with 100 additions and 38 deletions.
diff --git a/src/monocle_apptrace/instrumentor.py b/src/monocle_apptrace/instrumentor.py
@@ -11,7 +11,7 @@
 from opentelemetry.sdk.resources import SERVICE_NAME, Resource
 from opentelemetry import trace
 from opentelemetry.context import get_value, attach, set_value
-from monocle_apptrace.wrap_common import CONTEXT_PROPERTIES_KEY
+from monocle_apptrace.wrap_common import SESSION_PROPERTIES_KEY
 from monocle_apptrace.wrapper import INBUILT_METHODS_LIST, WrapperMethod
 from monocle_apptrace.exporters.file_exporter import FileSpanExporter
 
@@ -113,12 +113,12 @@ def setup_monocle_telemetry(
 
 
 def on_processor_start(span: Span, parent_context):
-    context_properties = get_value(CONTEXT_PROPERTIES_KEY)
+    context_properties = get_value(SESSION_PROPERTIES_KEY)
     if context_properties is not None:
         for key, value in context_properties.items():
             span.set_attribute(
-                f"{CONTEXT_PROPERTIES_KEY}.{key}", value
+                f"{SESSION_PROPERTIES_KEY}.{key}", value
             )
 
 def set_context_properties(properties: dict) -> None:
-    attach(set_value(CONTEXT_PROPERTIES_KEY, properties))
+    attach(set_value(SESSION_PROPERTIES_KEY, properties))
diff --git a/src/monocle_apptrace/wrap_common.py b/src/monocle_apptrace/wrap_common.py
@@ -15,7 +15,7 @@
 QUERY = "question"
 RESPONSE = "response"
 TAGS = "tags"
-CONTEXT_PROPERTIES_KEY = "workflow_context_properties"
+SESSION_PROPERTIES_KEY = "session"
 INFRA_SERVICE_KEY = "infra_service_name"
 
 

diff --git a/tests/langchain_async_test.py b/tests/langchain_async_test.py
@@ -26,7 +26,7 @@
     setup_monocle_telemetry,
 )
 from monocle_apptrace.wrap_common import (
-    CONTEXT_PROPERTIES_KEY,
+    SESSION_PROPERTIES_KEY,
     PROMPT_INPUT_KEY,
     PROMPT_OUTPUT_KEY,
     QUERY,
@@ -168,7 +168,7 @@ def get_event_attributes(events, key):
 
             assert input_event_attributes[QUERY] == query
             assert output_event_attributes[RESPONSE] == Test.ragText
-            assert root_span_attributes[f"{CONTEXT_PROPERTIES_KEY}.{context_key}"] == context_value
+            assert root_span_attributes[f"{SESSION_PROPERTIES_KEY}.{context_key}"] == context_value
 
             for spanObject in dataJson['batch']:
                 assert not spanObject["context"]["span_id"].startswith("0x")

diff --git a/tests/langchain_chat_sample.py b/tests/langchain_chat_sample.py
@@ -14,7 +14,7 @@
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from monocle_apptrace.instrumentor import set_context_properties, setup_monocle_telemetry
 from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
-from temp_langchain import create_history_aware_retriever
+from langhchain_patch import create_history_aware_retriever
 
 setup_monocle_telemetry(
             workflow_name="langchain_app_1",
@@ -154,8 +154,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16",
-#         "workflow_context_input": "What is Task Decomposition?"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -181,7 +180,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -207,7 +206,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -233,7 +232,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -259,7 +258,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -285,7 +284,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16",
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16",
 #         "server_url": "http://triton22.eastus.cloudapp.azure.com:8000/v2/models/flan_t5_783m/versions/1/infer",
 #         "completion_tokens": 57,
 #         "prompt_tokens": 580,
@@ -315,7 +314,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -341,7 +340,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -367,7 +366,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -393,7 +392,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16",
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16",
 #         "workflow_input": "What is Task Decomposition?",
 #         "workflow_name": "langchain_app_1",
 #         "workflow_output": "Task decomposition is a technique used to break down complex tasks into smaller and more manageable steps. This process helps agents or models handle intricate tasks by dividing them into simpler subtasks. Various methods, such as Chain of Thought and Tree of Thoughts, can be employed to decompose tasks effectively.",
@@ -423,7 +422,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -449,7 +448,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16",
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16",
 #         "server_url": "http://triton22.eastus.cloudapp.azure.com:8000/v2/models/flan_t5_783m/versions/1/infer",
 #         "completion_tokens": 10,
 #         "prompt_tokens": 140,
@@ -479,7 +478,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -505,8 +504,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16",
-#         "workflow_context_input": "What are some common methods used for task decomposition?"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -532,7 +530,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -558,7 +556,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -584,7 +582,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -610,7 +608,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -636,7 +634,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16",
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16",
 #         "server_url": "http://triton22.eastus.cloudapp.azure.com:8000/v2/models/flan_t5_783m/versions/1/infer",
 #         "completion_tokens": 63,
 #         "prompt_tokens": 619,
@@ -666,7 +664,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -692,7 +690,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "events": [],
 #     "links": [],
@@ -718,7 +716,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"
 #     },
 #     "resource": {
 #         "attributes": {
@@ -742,7 +740,7 @@ def format_docs(docs):
 #         "status_code": "UNSET"
 #     },
 #     "attributes": {
-#         "workflow_context_properties.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16",
+#         "session.session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16",
 #         "workflow_input": "What are common ways of doing it?",
 #         "workflow_name": "langchain_app_1",
 #         "workflow_output": "Task decomposition can be achieved through methods such as using Language Model (LLM) prompting with specific instructions like \"Steps for XYZ\" or \"What are the subgoals for achieving XYZ?\", providing task-specific instructions, or incorporating human inputs. These approaches help in breaking down tasks into smaller components for easier handling and execution.",

diff --git a/tests/langchain_sample.py b/tests/langchain_sample.py
@@ -137,7 +137,6 @@ def format_docs(docs):
 #     "trace_id": "0x4d297d14b25c3891eb4dd8b28453e91a",
 #     "parent_id": "0x7f0f48ee79169b5f",
 #     "attributes": {
-#         "workflow_context_input": "What is Task Decomposition?"
 #     },
 #     "events": []
 # }
diff --git a/tests/langchain_test.py b/tests/langchain_test.py
@@ -34,7 +34,7 @@
     setup_monocle_telemetry,
 )
 from monocle_apptrace.wrap_common import (
-    CONTEXT_PROPERTIES_KEY,
+    SESSION_PROPERTIES_KEY,
     INFRA_SERVICE_KEY,
     PROMPT_INPUT_KEY,
     PROMPT_OUTPUT_KEY,
@@ -168,7 +168,7 @@ def get_event_attributes(events, key):
 
             assert input_event_attributes[QUERY] == query
             assert output_event_attributes[RESPONSE] == TestHandler.ragText
-            assert root_span_attributes[f"{CONTEXT_PROPERTIES_KEY}.{context_key}"] == context_value
+            assert root_span_attributes[f"{SESSION_PROPERTIES_KEY}.{context_key}"] == context_value
             assert root_span_attributes[INFRA_SERVICE_KEY] == test_output_infra
 
             for spanObject in dataJson['batch']:

diff --git a/tests/langhchain_patch.py b/tests/langhchain_patch.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+from langchain_core.language_models import LanguageModelLike
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import BasePromptTemplate
+from langchain_core.retrievers import RetrieverLike, RetrieverOutputLike
+from langchain_core.runnables import RunnableBranch
+
+
+def create_history_aware_retriever(
+    llm: LanguageModelLike,
+    retriever: RetrieverLike,
+    prompt: BasePromptTemplate,
+) -> RetrieverOutputLike:
+    """Create a chain that takes conversation history and returns documents.
+
+    If there is no `chat_history`, then the `input` is just passed directly to the
+    retriever. If there is `chat_history`, then the prompt and LLM will be used
+    to generate a search query. That search query is then passed to the retriever.
+
+    Args:
+        llm: Language model to use for generating a search term given chat history
+        retriever: RetrieverLike object that takes a string as input and outputs
+            a list of Documents.
+        prompt: The prompt used to generate the search query for the retriever.
+
+    Returns:
+        An LCEL Runnable. The runnable input must take in `input`, and if there
+        is chat history should take it in the form of `chat_history`.
+        The Runnable output is a list of Documents
+
+    Example:
+        .. code-block:: python
+
+            # pip install -U langchain langchain-community
+
+            from langchain_community.chat_models import ChatOpenAI
+            from langchain.chains import create_history_aware_retriever
+            from langchain import hub
+
+            rephrase_prompt = hub.pull("langchain-ai/chat-langchain-rephrase")
+            llm = ChatOpenAI()
+            retriever = ...
+            chat_retriever_chain = create_history_aware_retriever(
+                llm, retriever, rephrase_prompt
+            )
+
+            chain.invoke({"input": "...", "chat_history": })
+
+    """
+    if "input" not in prompt.input_variables:
+        raise ValueError(
+            "Expected `input` to be a prompt variable, "
+            f"but got {prompt.input_variables}"
+        )
+
+    retrieve_documents: RetrieverOutputLike = RunnableBranch(
+        (
+            # Both empty string and empty list evaluate to False
+            lambda x: not x.get("chat_history", False),
+            # If no chat history, then we just pass input to retriever
+            (lambda x: x["input"]) | retriever,
+        ),
+        # If chat history, then we pass inputs to LLM chain, then to retriever
+        prompt | llm | StrOutputParser() | retriever,
+    ).with_config(run_name="chat_retriever_chain")
+    return retrieve_documents
diff --git a/tests/llama_index_sample.py b/tests/llama_index_sample.py
@@ -58,8 +58,6 @@
 #             "trace_id": "0xbd54e5d0edcd96634fa8a02c25c27519",
 #             "parent_id": "0xb4b14a8f14e7e770",
 #             "attributes": {
-#                 "workflow_context_input": "What did the author do growing up?",
-#                 "workflow_context_output": "this is some sample text"
 #             },
 #             "events": []
 #         },