updated S3 span exporter test

Signed-off-by: sachintendulkar576123 <[email protected]>
monocle2ai · Oct 9, 2024 · 50f8992 · 50f8992
1 parent 9e09166
commit 50f8992
Showing 1 changed file with 113 additions and 0 deletions.
diff --git a/tests/langchain_sample_s3.py b/tests/langchain_sample_s3.py
@@ -0,0 +1,113 @@
+
+
+import bs4
+from langchain import hub
+from langchain.chains import create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_chroma import Chroma
+from langchain_community.document_loaders import WebBaseLoader
+from langchain_core.messages import HumanMessage
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.runnables import RunnablePassthrough
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from monocle_apptrace.instrumentor import set_context_properties, setup_monocle_telemetry
+from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
+from langhchain_patch import create_history_aware_retriever
+from monocle_apptrace.exporters.aws.s3_exporter import S3SpanExporter
+import logging
+logging.basicConfig(level=logging.INFO)
+import os
+import time
+from dotenv import load_dotenv, dotenv_values
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+exporter = S3SpanExporter(
+    region_name='us-east-1',
+    bucket_name='sachin-dev'
+)
+setup_monocle_telemetry(
+            workflow_name="langchain_app_1",
+            span_processors=[BatchSpanProcessor(exporter)],
+            wrapper_methods=[])
+
+llm = ChatOpenAI(model="gpt-3.5-turbo-0125",api_key=OPENAI_API_KEY)
+
+# Load, chunk and index the contents of the blog.
+loader = WebBaseLoader(
+    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
+    bs_kwargs=dict(
+        parse_only=bs4.SoupStrainer(
+            class_=("post-content", "post-title", "post-header")
+        )
+    ),
+)
+docs = loader.load()
+
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+splits = text_splitter.split_documents(docs)
+vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(api_key=OPENAI_API_KEY))
+
+# Retrieve and generate using the relevant snippets of the blog.
+retriever = vectorstore.as_retriever()
+prompt = hub.pull("rlm/rag-prompt")
+
+def format_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs)
+
+rag_chain = (
+    {"context": retriever | format_docs, "question": RunnablePassthrough()}
+    | prompt
+    | llm
+    | StrOutputParser()
+)
+
+
+contextualize_q_system_prompt = """Given a chat history and the latest user question \
+which might reference context in the chat history, formulate a standalone question \
+which can be understood without the chat history. Do NOT answer the question, \
+just reformulate it if needed and otherwise return it as is."""
+contextualize_q_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", contextualize_q_system_prompt),
+        MessagesPlaceholder("chat_history"),
+        ("human", "{input}"),
+    ]
+)
+history_aware_retriever = create_history_aware_retriever(
+    llm, retriever, contextualize_q_prompt
+)
+
+qa_system_prompt = """You are an assistant for question-answering tasks. \
+Use the following pieces of retrieved context to answer the question. \
+If you don't know the answer, just say that you don't know. \
+Use three sentences maximum and keep the answer concise.\
+
+{context}"""
+qa_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", qa_system_prompt),
+        MessagesPlaceholder("chat_history"),
+        ("human", "{input}"),
+    ]
+)
+
+
+question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
+
+rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
+
+chat_history = []
+
+set_context_properties({"session_id": "0x4fa6d91d1f2a4bdbb7a1287d90ec4a16"})
+
+question = "What is Task Decomposition?"
+ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
+print(ai_msg_1["answer"])
+chat_history.extend([HumanMessage(content=question), ai_msg_1["answer"]])
+
+second_question = "What are common ways of doing it?"
+ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})
+
+print(ai_msg_2["answer"])