update vectorstore deployment url

Signed-off-by: sachintendulkar576123 <[email protected]>
monocle2ai · Nov 26, 2024 · 3df46da · 3df46da
1 parent 388209f
commit 3df46da
Show file tree

Hide file tree

Showing 9 changed files with 896 additions and 3 deletions.
diff --git a/src/monocle_apptrace/metamodel/maps/attributes/retrieval/haystack_entities.json b/src/monocle_apptrace/metamodel/maps/attributes/retrieval/haystack_entities.json
@@ -5,11 +5,15 @@
       {
         "_comment": "vector store name and type",
         "attribute": "name",
-        "accessor": "lambda arguments: arguments['instance'].__dict__.get(\"document_store\").__class__.__name__"
+         "accessor": "lambda arguments: resolve_from_alias(arguments['instance'].__dict__, ['document_store', '_document_store']).__class__.__name__"
       },
       {
         "attribute": "type",
-        "accessor": "lambda arguments: 'vectorstore.'+arguments['instance'].__dict__.get(\"document_store\").__class__.__name__"
+        "accessor": "lambda arguments: 'vectorstore.'+resolve_from_alias(arguments['instance'].__dict__, ['document_store', '_document_store']).__class__.__name__"
+      },
+      {
+        "attribute": "deployment",
+        "accessor": "lambda arguments: get_vectorstore_deployment(resolve_from_alias(arguments['instance'].__dict__, ['document_store', '_document_store']).__dict__)"
       }
     ],
     [

diff --git a/src/monocle_apptrace/metamodel/maps/attributes/retrieval/langchain_entities.json b/src/monocle_apptrace/metamodel/maps/attributes/retrieval/langchain_entities.json
@@ -10,6 +10,10 @@
       {
         "attribute": "type",
         "accessor": "lambda arguments: 'vectorstore.'+type(arguments['instance'].vectorstore).__name__"
+      },
+      {
+        "attribute": "deployment",
+        "accessor": "lambda arguments: get_vectorstore_deployment(arguments['instance'].vectorstore.__dict__)"
       }
     ],
     [

diff --git a/src/monocle_apptrace/metamodel/maps/attributes/retrieval/llamaindex_entities.json b/src/monocle_apptrace/metamodel/maps/attributes/retrieval/llamaindex_entities.json
@@ -10,6 +10,10 @@
       {
         "attribute": "type",
         "accessor": "lambda arguments: 'vectorstore.'+type(arguments['instance']._vector_store).__name__"
+      },
+      {
+        "attribute": "deployment",
+        "accessor": "lambda arguments: get_vectorstore_deployment(arguments['instance']._vector_store)"
       }
     ],
     [

diff --git a/src/monocle_apptrace/metamodel/maps/haystack_methods.json b/src/monocle_apptrace/metamodel/maps/haystack_methods.json
@@ -1,5 +1,14 @@
 {
 "wrapper_methods" :  [
+    {
+        "package": "haystack_integrations.components.retrievers.opensearch",
+        "object": "OpenSearchEmbeddingRetriever",
+        "method": "run",
+        "span_name": "haystack.retriever",
+        "wrapper_package": "wrap_common",
+        "wrapper_method": "task_wrapper",
+        "output_processor": ["metamodel/maps/attributes/retrieval/haystack_entities.json"]
+    },
     {
         "package": "haystack.components.retrievers.in_memory",
         "object": "InMemoryEmbeddingRetriever",

diff --git a/src/monocle_apptrace/utils.py b/src/monocle_apptrace/utils.py
@@ -218,3 +218,37 @@ def get_workflow_name(span: Span) -> str:
     except Exception as e:
         logger.exception(f"Error getting workflow name: {e}")
         return None
+
+def get_vectorstore_deployment(my_map):
+    if isinstance(my_map,dict):
+        for keys in my_map.keys():
+            if keys == '_client_settings' and '_client_settings' in my_map:
+                client=my_map['_client_settings'].__dict__
+                host, port = [], []
+                for key, value in client.items():
+                    if value is not None:
+                        if "host" in key:
+                            host.append(value)
+                        elif "port" in key:
+                            port.append(value)
+                if host is not None and port is not None:
+                    return host[0]+":"+str(port[0])
+
+            if keys =='client' and 'client' in my_map and 'host' in my_map['client'].transport.seed_connections[0].__dict__:
+                return my_map['client'].transport.seed_connections[0].__dict__['host']
+            if keys =='_client' and '_client' in my_map and 'host' in my_map['_client'].transport.seed_connections[0].__dict__:
+                return my_map['_client'].transport.seed_connections[0].__dict__['host']
+    else:
+        if hasattr(my_map, 'client') and '_endpoint' in my_map.client.__dict__:
+            return my_map.client.__dict__['_endpoint']
+
+        host, port = [], []
+        for key, value in my_map.__dict__.items():
+            if value is not None:
+                if "host" in key:
+                    host.append(value)
+                elif "port" in key:
+                    port.append(value)
+
+        if host is not None and port is not None:
+            return host[0] + ":" + str(port[0])
diff --git a/src/monocle_apptrace/wrap_common.py b/src/monocle_apptrace/wrap_common.py
@@ -7,7 +7,7 @@
 from opentelemetry.trace import Tracer
 from opentelemetry.sdk.trace import Span
 from monocle_apptrace.utils import resolve_from_alias, with_tracer_wrapper, get_embedding_model, get_attribute, get_workflow_name, set_embedding_model, set_app_hosting_identifier_attribute
-from monocle_apptrace.utils import set_attribute
+from monocle_apptrace.utils import set_attribute, get_vectorstore_deployment
 from monocle_apptrace.utils import get_fully_qualified_class_name, flatten_dict, get_nested_value
 logger = logging.getLogger(__name__)
 WORKFLOW_TYPE_KEY = "workflow_type"

diff --git a/tests/haystack_opensearch_sample.py b/tests/haystack_opensearch_sample.py
@@ -0,0 +1,218 @@
+import os
+from datasets import load_dataset
+from haystack import Document, Pipeline
+from haystack.components.builders import PromptBuilder
+from haystack.components.embedders import (
+    SentenceTransformersDocumentEmbedder,
+    SentenceTransformersTextEmbedder,
+)
+from haystack.components.generators import OpenAIGenerator
+from haystack_integrations.components.retrievers.opensearch  import OpenSearchEmbeddingRetriever
+from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore
+
+from haystack.document_stores.types import DuplicatePolicy
+from haystack.utils import Secret
+from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
+from monocle_apptrace.instrumentor import setup_monocle_telemetry
+from monocle_apptrace.wrap_common import llm_wrapper, task_wrapper
+from monocle_apptrace.wrapper import WrapperMethod
+
+def haystack_app():
+
+    setup_monocle_telemetry(
+            workflow_name="haystack_app_1",
+            span_processors=[BatchSpanProcessor(ConsoleSpanExporter())],
+            wrapper_methods=[
+
+
+            ])
+
+    # initialize
+
+    api_key = os.getenv("OPENAI_API_KEY")
+    http_auth=("sachin-opensearch", "Sachin@123")
+    generator = OpenAIGenerator(
+        api_key=Secret.from_token(api_key), model="gpt-3.5-turbo"
+    )
+    document_store = OpenSearchDocumentStore(hosts="https://search-sachin-opensearch-cvvd5pdeyrme2l2y26xmcpkm2a.us-east-1.es.amazonaws.com", use_ssl=True,
+                        verify_certs=True, http_auth=http_auth)
+    model = "sentence-transformers/all-mpnet-base-v2"
+
+    # documents = [Document(content="There are over 7,000 languages spoken around the world today."),
+    #                         Document(content="Elephants have been observed to behave in a way that indicates a high level of self-awareness, such as recognizing themselves in mirrors."),
+    #                         Document(content="In certain parts of the world, like the Maldives, Puerto Rico, and San Diego, you can witness the phenomenon of bioluminescent waves.")]
+
+    dataset = load_dataset("bilgeyucel/seven-wonders", split="train")
+    documents = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset]
+    document_embedder = SentenceTransformersDocumentEmbedder(model=model)
+    document_embedder.warm_up()
+    documents_with_embeddings = document_embedder.run(documents)
+
+    document_store.write_documents(documents_with_embeddings.get("documents"), policy=DuplicatePolicy.SKIP)
+
+
+    # embedder to embed user query
+    text_embedder = SentenceTransformersTextEmbedder(
+        model="sentence-transformers/all-mpnet-base-v2"
+    )
+
+    # get relevant documents from embedded query
+    retriever = OpenSearchEmbeddingRetriever(document_store=document_store)
+
+    # use documents to build the prompt
+    template = """
+    Given the following information, answer the question.
+
+    Context:
+    {% for document in documents %}
+        {{ document.content }}
+    {% endfor %}
+
+    Question: {{question}}
+    Answer:
+    """
+
+    prompt_builder = PromptBuilder(template=template)
+
+    basic_rag_pipeline = Pipeline()
+    # Add components to your pipeline
+    basic_rag_pipeline.add_component("text_embedder", text_embedder)
+    basic_rag_pipeline.add_component("retriever", retriever)
+    basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
+    basic_rag_pipeline.add_component("llm", generator)
+
+    # Now, connect the components to each other
+    basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
+    basic_rag_pipeline.connect("retriever", "prompt_builder.documents")
+    basic_rag_pipeline.connect("prompt_builder", "llm")
+
+    question = "What does Rhodes Statue look like?"
+
+    response = basic_rag_pipeline.run(
+        {"text_embedder": {"text": question}, "prompt_builder": {"question": question}}
+    )
+
+    # print(response["llm"]["replies"][0])
+
+
+haystack_app()
+
+# {
+#     "name": "haystack.retriever",
+#     "context": {
+#         "trace_id": "0xa599cf84e013b83c58e3afaf8a7058f8",
+#         "span_id": "0x90b01a17810b9b38",
+#         "trace_state": "[]"
+#     },
+#     "kind": "SpanKind.INTERNAL",
+#     "parent_id": "0x557fc857283d8651",
+#     "start_time": "2024-11-26T09:52:00.845732Z",
+#     "end_time": "2024-11-26T09:52:01.742785Z",
+#     "status": {
+#         "status_code": "UNSET"
+#     },
+#     "attributes": {
+#         "span.type": "retrieval",
+#         "entity.count": 2,
+#         "entity.1.name": "OpenSearchDocumentStore",
+#         "entity.1.type": "vectorstore.OpenSearchDocumentStore",
+#         "entity.1.deployment": "https://search-sachin-opensearch-cvvd5pdeyrme2l2y26xmcpkm2a.us-east-1.es.amazonaws.com:443",
+#         "entity.2.name": "sentence-transformers/all-mpnet-base-v2",
+#         "entity.2.type": "model.embedding.sentence-transformers/all-mpnet-base-v2"
+#     },
+#     "events": [],
+#     "links": [],
+#     "resource": {
+#         "attributes": {
+#             "service.name": "haystack_app_1"
+#         },
+#         "schema_url": ""
+#     }
+# }
+# {
+#     "name": "haystack.components.generators.openai.OpenAIGenerator",
+#     "context": {
+#         "trace_id": "0xa599cf84e013b83c58e3afaf8a7058f8",
+#         "span_id": "0x1de03fa69ab19977",
+#         "trace_state": "[]"
+#     },
+#     "kind": "SpanKind.INTERNAL",
+#     "parent_id": "0x557fc857283d8651",
+#     "start_time": "2024-11-26T09:52:01.742785Z",
+#     "end_time": "2024-11-26T09:52:03.804858Z",
+#     "status": {
+#         "status_code": "UNSET"
+#     },
+#     "attributes": {
+#         "span.type": "inference",
+#         "entity.count": 2,
+#         "entity.1.type": "inference.azure_oai",
+#         "entity.1.inference_endpoint": "https://api.openai.com/v1/",
+#         "entity.2.name": "gpt-3.5-turbo",
+#         "entity.2.type": "model.llm.gpt-3.5-turbo"
+#     },
+#     "events": [
+#         {
+#             "name": "metadata",
+#             "timestamp": "2024-11-26T09:52:03.804858Z",
+#             "attributes": {
+#                 "completion_tokens": 126,
+#                 "prompt_tokens": 2433,
+#                 "total_tokens": 2559
+#             }
+#         }
+#     ],
+#     "links": [],
+#     "resource": {
+#         "attributes": {
+#             "service.name": "haystack_app_1"
+#         },
+#         "schema_url": ""
+#     }
+# }
+# {
+#     "name": "haystack.core.pipeline.pipeline.Pipeline",
+#     "context": {
+#         "trace_id": "0xa599cf84e013b83c58e3afaf8a7058f8",
+#         "span_id": "0x557fc857283d8651",
+#         "trace_state": "[]"
+#     },
+#     "kind": "SpanKind.INTERNAL",
+#     "parent_id": null,
+#     "start_time": "2024-11-26T09:52:00.681588Z",
+#     "end_time": "2024-11-26T09:52:03.805858Z",
+#     "status": {
+#         "status_code": "UNSET"
+#     },
+#     "attributes": {
+#         "monocle_apptrace.version": "0.3.0",
+#         "span.type": "workflow",
+#         "entity.1.name": "haystack_app_1",
+#         "entity.1.type": "workflow.haystack"
+#     },
+#     "events": [
+#         {
+#             "name": "data.input",
+#             "timestamp": "2024-11-26T09:52:00.684591Z",
+#             "attributes": {
+#                 "question": "What does Rhodes Statue look like?"
+#             }
+#         },
+#         {
+#             "name": "data.output",
+#             "timestamp": "2024-11-26T09:52:03.805858Z",
+#             "attributes": {
+#                 "response": [
+#                     "The Rhodes Statue was a colossal statue of the Greek sun-god Helios, standing approximately 33 meters (108 feet) high. It featured a standard rendering of a head with curly hair and spikes of bronze or silver flame radiating from it. The statue was constructed with iron tie bars and brass plates to form the skin, and filled with stone blocks during construction. The statue collapsed at the knees during an earthquake in 226 BC and remained on the ground for over 800 years. It was ultimately destroyed and the remains were sold. The exact appearance of the statue, aside from its size and head details, is unknown."
+#                 ]
+#             }
+#         }
+#     ],
+#     "links": [],
+#     "resource": {
+#         "attributes": {
+#             "service.name": "haystack_app_1"
+#         },
+#         "schema_url": ""
+#     }
+# }